]> git.neil.brown.name Git - history.git/commitdiff
[PATCH] x86_64 merge: arch + asm
authorAndi Kleen <ak@muc.de>
Wed, 13 Feb 2002 04:17:35 +0000 (20:17 -0800)
committerLinus Torvalds <torvalds@home.transmeta.com>
Wed, 13 Feb 2002 04:17:35 +0000 (20:17 -0800)
This adds the x86_64 arch and asm directories and a Documentation/x86_64.

It took a bit longer because I first had to make preemption and thread_info
work and also found some other bugs while doing this. The port has been
tested for a long time on UP.

I'm not sure what I should describe.  A lot is based on i386 with
a lot of cleanups. I wrote a paper about it for last year's OLS that describes
most of the changes (ftp://ftp.firstfloor.org/pub/ak/x86_64.ps.gz). It is
a bit outdated now, but should give a good overview.

It currently has a completely cut'n'pasted from others+hacked 32bit
emulation. I hope to clean that up in the future by merging the generic
core of this with other 64bit archs.

Thanks,
-Andi

192 files changed:
Documentation/x86_64/mm.txt [new file with mode: 0644]
arch/x86_64/Config.help [new file with mode: 0644]
arch/x86_64/Makefile [new file with mode: 0644]
arch/x86_64/boot/Makefile [new file with mode: 0644]
arch/x86_64/boot/bootsect.S [new file with mode: 0644]
arch/x86_64/boot/compressed/Makefile [new file with mode: 0644]
arch/x86_64/boot/compressed/head.S [new file with mode: 0644]
arch/x86_64/boot/compressed/misc.c [new file with mode: 0644]
arch/x86_64/boot/compressed/miscsetup.h [new file with mode: 0644]
arch/x86_64/boot/install.sh [new file with mode: 0644]
arch/x86_64/boot/setup.S [new file with mode: 0644]
arch/x86_64/boot/tools/build.c [new file with mode: 0644]
arch/x86_64/boot/video.S [new file with mode: 0644]
arch/x86_64/config.in [new file with mode: 0644]
arch/x86_64/defconfig [new file with mode: 0644]
arch/x86_64/ia32/Makefile [new file with mode: 0644]
arch/x86_64/ia32/ia32_binfmt.c [new file with mode: 0644]
arch/x86_64/ia32/ia32_ioctl.c [new file with mode: 0644]
arch/x86_64/ia32/ia32_signal.c [new file with mode: 0644]
arch/x86_64/ia32/ia32entry.S [new file with mode: 0644]
arch/x86_64/ia32/ptrace32.c [new file with mode: 0644]
arch/x86_64/ia32/socket32.c [new file with mode: 0644]
arch/x86_64/ia32/sys_ia32.c [new file with mode: 0644]
arch/x86_64/kernel/Makefile [new file with mode: 0644]
arch/x86_64/kernel/apic.c [new file with mode: 0644]
arch/x86_64/kernel/bluesmoke.c [new file with mode: 0644]
arch/x86_64/kernel/cpuid.c [new file with mode: 0644]
arch/x86_64/kernel/early_printk.c [new file with mode: 0644]
arch/x86_64/kernel/entry.S [new file with mode: 0644]
arch/x86_64/kernel/head.S [new file with mode: 0644]
arch/x86_64/kernel/head64.c [new file with mode: 0644]
arch/x86_64/kernel/i387.c [new file with mode: 0644]
arch/x86_64/kernel/i8259.c [new file with mode: 0644]
arch/x86_64/kernel/init_task.c [new file with mode: 0644]
arch/x86_64/kernel/io_apic.c [new file with mode: 0644]
arch/x86_64/kernel/ioport.c [new file with mode: 0644]
arch/x86_64/kernel/irq.c [new file with mode: 0644]
arch/x86_64/kernel/ldt.c [new file with mode: 0644]
arch/x86_64/kernel/mpparse.c [new file with mode: 0644]
arch/x86_64/kernel/msr.c [new file with mode: 0644]
arch/x86_64/kernel/mtrr.c [new file with mode: 0644]
arch/x86_64/kernel/nmi.c [new file with mode: 0644]
arch/x86_64/kernel/pci-dma.c [new file with mode: 0644]
arch/x86_64/kernel/pci-irq.c [new file with mode: 0644]
arch/x86_64/kernel/pci-pc.c [new file with mode: 0644]
arch/x86_64/kernel/pci-x86_64.c [new file with mode: 0644]
arch/x86_64/kernel/pci-x86_64.h [new file with mode: 0644]
arch/x86_64/kernel/process.c [new file with mode: 0644]
arch/x86_64/kernel/ptrace.c [new file with mode: 0644]
arch/x86_64/kernel/semaphore.c [new file with mode: 0644]
arch/x86_64/kernel/setup.c [new file with mode: 0644]
arch/x86_64/kernel/setup64.c [new file with mode: 0644]
arch/x86_64/kernel/signal.c [new file with mode: 0644]
arch/x86_64/kernel/smp.c [new file with mode: 0644]
arch/x86_64/kernel/smpboot.c [new file with mode: 0644]
arch/x86_64/kernel/sys_x86_64.c [new file with mode: 0644]
arch/x86_64/kernel/syscall.c [new file with mode: 0644]
arch/x86_64/kernel/time.c [new file with mode: 0644]
arch/x86_64/kernel/trampoline.S [new file with mode: 0644]
arch/x86_64/kernel/traps.c [new file with mode: 0644]
arch/x86_64/kernel/vsyscall.c [new file with mode: 0644]
arch/x86_64/kernel/x8664_ksyms.c [new file with mode: 0644]
arch/x86_64/lib/Makefile [new file with mode: 0644]
arch/x86_64/lib/checksum_copy.S [new file with mode: 0644]
arch/x86_64/lib/dec_and_lock.c [new file with mode: 0644]
arch/x86_64/lib/delay.c [new file with mode: 0644]
arch/x86_64/lib/generic-checksum.c [new file with mode: 0644]
arch/x86_64/lib/getuser.S [new file with mode: 0644]
arch/x86_64/lib/iodebug.c [new file with mode: 0644]
arch/x86_64/lib/mmx.c [new file with mode: 0644]
arch/x86_64/lib/old-checksum.c [new file with mode: 0644]
arch/x86_64/lib/putuser.S [new file with mode: 0644]
arch/x86_64/lib/rwsem_thunk.S [new file with mode: 0644]
arch/x86_64/lib/usercopy.c [new file with mode: 0644]
arch/x86_64/mm/Makefile [new file with mode: 0644]
arch/x86_64/mm/extable.c [new file with mode: 0644]
arch/x86_64/mm/fault.c [new file with mode: 0644]
arch/x86_64/mm/init.c [new file with mode: 0644]
arch/x86_64/mm/ioremap.c [new file with mode: 0644]
arch/x86_64/tools/Makefile [new file with mode: 0644]
arch/x86_64/tools/offset.c [new file with mode: 0644]
arch/x86_64/tools/offset.sed [new file with mode: 0644]
arch/x86_64/vmlinux.lds [new file with mode: 0644]
include/asm-x86_64/a.out.h [new file with mode: 0644]
include/asm-x86_64/apic.h [new file with mode: 0644]
include/asm-x86_64/apicdef.h [new file with mode: 0644]
include/asm-x86_64/atomic.h [new file with mode: 0644]
include/asm-x86_64/bitops.h [new file with mode: 0644]
include/asm-x86_64/boot.h [new file with mode: 0644]
include/asm-x86_64/bootsetup.h [new file with mode: 0644]
include/asm-x86_64/bugs.h [new file with mode: 0644]
include/asm-x86_64/byteorder.h [new file with mode: 0644]
include/asm-x86_64/cache.h [new file with mode: 0644]
include/asm-x86_64/calling.h [new file with mode: 0644]
include/asm-x86_64/checksum.h [new file with mode: 0644]
include/asm-x86_64/cpufeature.h [new file with mode: 0644]
include/asm-x86_64/current.h [new file with mode: 0644]
include/asm-x86_64/debugreg.h [new file with mode: 0644]
include/asm-x86_64/delay.h [new file with mode: 0644]
include/asm-x86_64/desc.h [new file with mode: 0644]
include/asm-x86_64/div64.h [new file with mode: 0644]
include/asm-x86_64/dma.h [new file with mode: 0644]
include/asm-x86_64/e820.h [new file with mode: 0644]
include/asm-x86_64/elf.h [new file with mode: 0644]
include/asm-x86_64/errno.h [new file with mode: 0644]
include/asm-x86_64/fcntl.h [new file with mode: 0644]
include/asm-x86_64/fixmap.h [new file with mode: 0644]
include/asm-x86_64/floppy.h [new file with mode: 0644]
include/asm-x86_64/hardirq.h [new file with mode: 0644]
include/asm-x86_64/hdreg.h [new file with mode: 0644]
include/asm-x86_64/hw_irq.h [new file with mode: 0644]
include/asm-x86_64/i387.h [new file with mode: 0644]
include/asm-x86_64/ia32.h [new file with mode: 0644]
include/asm-x86_64/ia32_unistd.h [new file with mode: 0644]
include/asm-x86_64/ide.h [new file with mode: 0644]
include/asm-x86_64/init.h [new file with mode: 0644]
include/asm-x86_64/io.h [new file with mode: 0644]
include/asm-x86_64/io_apic.h [new file with mode: 0644]
include/asm-x86_64/ioctl.h [new file with mode: 0644]
include/asm-x86_64/ioctls.h [new file with mode: 0644]
include/asm-x86_64/ipc.h [new file with mode: 0644]
include/asm-x86_64/ipcbuf.h [new file with mode: 0644]
include/asm-x86_64/irq.h [new file with mode: 0644]
include/asm-x86_64/kdebug.h [new file with mode: 0644]
include/asm-x86_64/keyboard.h [new file with mode: 0644]
include/asm-x86_64/kmap_types.h [new file with mode: 0644]
include/asm-x86_64/ldt.h [new file with mode: 0644]
include/asm-x86_64/linux_logo.h [new file with mode: 0644]
include/asm-x86_64/locks.h [new file with mode: 0644]
include/asm-x86_64/mc146818rtc.h [new file with mode: 0644]
include/asm-x86_64/mman.h [new file with mode: 0644]
include/asm-x86_64/mmu.h [new file with mode: 0644]
include/asm-x86_64/mmu_context.h [new file with mode: 0644]
include/asm-x86_64/mmx.h [new file with mode: 0644]
include/asm-x86_64/module.h [new file with mode: 0644]
include/asm-x86_64/mpspec.h [new file with mode: 0644]
include/asm-x86_64/msgbuf.h [new file with mode: 0644]
include/asm-x86_64/msr.h [new file with mode: 0644]
include/asm-x86_64/mtrr.h [new file with mode: 0644]
include/asm-x86_64/namei.h [new file with mode: 0644]
include/asm-x86_64/page.h [new file with mode: 0644]
include/asm-x86_64/param.h [new file with mode: 0644]
include/asm-x86_64/parport.h [new file with mode: 0644]
include/asm-x86_64/pci.h [new file with mode: 0644]
include/asm-x86_64/pda.h [new file with mode: 0644]
include/asm-x86_64/pgalloc.h [new file with mode: 0644]
include/asm-x86_64/pgtable.h [new file with mode: 0644]
include/asm-x86_64/poll.h [new file with mode: 0644]
include/asm-x86_64/posix_types.h [new file with mode: 0644]
include/asm-x86_64/prctl.h [new file with mode: 0644]
include/asm-x86_64/processor.h [new file with mode: 0644]
include/asm-x86_64/ptrace.h [new file with mode: 0644]
include/asm-x86_64/resource.h [new file with mode: 0644]
include/asm-x86_64/rwlock.h [new file with mode: 0644]
include/asm-x86_64/rwsem.h [new file with mode: 0644]
include/asm-x86_64/scatterlist.h [new file with mode: 0644]
include/asm-x86_64/segment.h [new file with mode: 0644]
include/asm-x86_64/semaphore.h [new file with mode: 0644]
include/asm-x86_64/sembuf.h [new file with mode: 0644]
include/asm-x86_64/serial.h [new file with mode: 0644]
include/asm-x86_64/setup.h [new file with mode: 0644]
include/asm-x86_64/shmbuf.h [new file with mode: 0644]
include/asm-x86_64/shmparam.h [new file with mode: 0644]
include/asm-x86_64/sigcontext.h [new file with mode: 0644]
include/asm-x86_64/siginfo.h [new file with mode: 0644]
include/asm-x86_64/signal.h [new file with mode: 0644]
include/asm-x86_64/smp.h [new file with mode: 0644]
include/asm-x86_64/smplock.h [new file with mode: 0644]
include/asm-x86_64/socket.h [new file with mode: 0644]
include/asm-x86_64/socket32.h [new file with mode: 0644]
include/asm-x86_64/sockios.h [new file with mode: 0644]
include/asm-x86_64/softirq.h [new file with mode: 0644]
include/asm-x86_64/spinlock.h [new file with mode: 0644]
include/asm-x86_64/stat.h [new file with mode: 0644]
include/asm-x86_64/statfs.h [new file with mode: 0644]
include/asm-x86_64/string.h [new file with mode: 0644]
include/asm-x86_64/system.h [new file with mode: 0644]
include/asm-x86_64/termbits.h [new file with mode: 0644]
include/asm-x86_64/termios.h [new file with mode: 0644]
include/asm-x86_64/thread_info.h [new file with mode: 0644]
include/asm-x86_64/timex.h [new file with mode: 0644]
include/asm-x86_64/tlb.h [new file with mode: 0644]
include/asm-x86_64/types.h [new file with mode: 0644]
include/asm-x86_64/uaccess.h [new file with mode: 0644]
include/asm-x86_64/ucontext.h [new file with mode: 0644]
include/asm-x86_64/unaligned.h [new file with mode: 0644]
include/asm-x86_64/unistd.h [new file with mode: 0644]
include/asm-x86_64/user.h [new file with mode: 0644]
include/asm-x86_64/user32.h [new file with mode: 0644]
include/asm-x86_64/vga.h [new file with mode: 0644]
include/asm-x86_64/vsyscall.h [new file with mode: 0644]
include/asm-x86_64/xor.h [new file with mode: 0644]

diff --git a/Documentation/x86_64/mm.txt b/Documentation/x86_64/mm.txt
new file mode 100644 (file)
index 0000000..4f7ee73
--- /dev/null
@@ -0,0 +1,148 @@
+The paging design used on the x86-64 linux kernel port in 2.4.x provides:
+
+o      per process virtual address space limit of 512 Gigabytes
+o      top of userspace stack located at address 0x0000007fffffffff
+o      PAGE_OFFSET = 0xffff800000000000
+o      start of the kernel = 0xffffffff800000000
+o      global RAM per system 2^64-PAGE_OFFSET-sizeof(kernel) = 128 Terabytes - 2 Gigabytes
+o      no need of any common code change
+o      no need to use highmem to handle the 128 Terabytes of RAM
+
+Description:
+
+       Userspace is able to modify and it sees only the 3rd/2nd/1st level
+       pagetables (pgd_offset() implicitly walks the 1st slot of the 4th
+       level pagetable and it returns an entry into the 3rd level pagetable).
+       This is where the per-process 512 Gigabytes limit cames from.
+
+       The common code pgd is the PDPE, the pmd is the PDE, the
+       pte is the PTE. The PML4E remains invisible to the common
+       code.
+
+       The kernel uses all the first 47 bits of the negative half
+       of the virtual address space to build the direct mapping using
+       2 Mbytes page size. The kernel virtual  addresses have bit number
+       47 always set to 1 (and in turn also bits 48-63 are set to 1 too,
+       due the sign extension). This is where the 128 Terabytes - 2 Gigabytes global
+       limit of RAM cames from.
+
+       Since the per-process limit is 512 Gigabytes (due to kernel common
+       code 3 level pagetable limitation), the higher virtual address mapped
+       into userspace is 0x7fffffffff and it makes sense to use it
+       as the top of the userspace stack to allow the stack to grow as
+       much as possible.
+
+       Setting the PAGE_OFFSET to 2^39 (after the last userspace
+       virtual address) wouldn't make much difference compared to
+       setting PAGE_OFFSET to 0xffff800000000000 because we have an
+       hole into the virtual address space. The last byte mapped by the
+       255th slot in the 4th level pagetable is at virtual address
+       0x00007fffffffffff and the first byte mapped by the 256th slot in the
+       4th level pagetable is at address 0xffff800000000000. Due to this
+       hole we can't trivially build a direct mapping across all the
+       512 slots of the 4th level pagetable, so we simply use only the
+       second (negative) half of the 4th level pagetable for that purpose
+       (that provides us 128 Terabytes of contigous virtual addresses).
+       Strictly speaking we could build a direct mapping also across the hole
+       using some DISCONTIGMEM trick, but we don't need such a large
+       direct mapping right now.
+
+Future:
+
+       During 2.5.x we can break the 512 Gigabytes per-process limit
+       possibly by removing from the common code any knowledge about the
+       architectural dependent physical layout of the virtual to physical
+       mapping.
+
+       Once the 512 Gigabytes limit will be removed the kernel stack will
+       be moved (most probably to virtual address 0x00007fffffffffff).
+       Nothing will break in userspace due that move, as nothing breaks
+       in IA32 compiling the kernel with CONFIG_2G.
+
+Linus agreed on not breaking common code and to live with the 512 Gigabytes
+per-process limitation for the 2.4.x timeframe and he has given me and Andi
+some very useful hints... (thanks! :)
+
+Thanks also to H. Peter Anvin for his interesting and useful suggestions on
+the x86-64-discuss lists!
+
+Other memory management related issues follows:
+
+PAGE_SIZE:
+
+       If somebody is wondering why these days we still have a so small
+       4k pagesize (16 or 32 kbytes would be much better for performance
+       of course), the PAGE_SIZE have to remain 4k for 32bit apps to
+       provide 100% backwards compatible IA32 API (we can't allow silent
+       fs corruption or as best a loss of coherency with the page cache
+       by allocating MAP_SHARED areas in MAP_ANONYMOUS memory with a
+       do_mmap_fake). I think it could be possible to have a dynamic page
+       size between 32bit and 64bit apps but it would need extremely
+       intrusive changes in the common code as first for page cache and
+       we sure don't want to depend on them right now even if the
+       hardware would support that.
+
+PAGETABLE SIZE:
+
+       In turn we can't afford to have pagetables larger than 4k because
+       we could not be able to allocate them due physical memory
+       fragmentation, and failing to allocate the kernel stack is a minor
+       issue compared to failing the allocation of a pagetable. If we
+       fail the allocation of a pagetable the only thing we can do is to
+       sched_yield polling the freelist (deadlock prone) or to segfault
+       the task (not even the sighandler would be sure to run).
+
+KERNEL STACK:
+
+       1st stage:
+
+       The kernel stack will be at first allocated with an order 2 allocation
+       (16k) (the utilization of the stack for a 64bit platform really
+       isn't exactly the double of a 32bit platform because the local
+       variables may not be all 64bit wide, but not much less). This will
+       make things even worse than they are right now on IA32 with
+       respect of failing fork/clone due memory fragmentation.
+
+       2nd stage:
+
+       We'll benchmark if reserving one register as task_struct
+       pointer will improve performance of the kernel (instead of
+       recalculating the task_struct pointer starting from the stack
+       pointer each time). My guess is that recalculating will be faster
+       but it worth a try.
+
+               If reserving one register for the task_struct pointer
+               will be faster we can as well split task_struct and kernel
+               stack. task_struct can be a slab allocation or a
+               PAGE_SIZEd allocation, and the kernel stack can then be
+               allocated in a order 1 allocation. Really this is risky,
+               since 8k on a 64bit platform is going to be less than 7k
+               on a 32bit platform but we could try it out. This would
+               reduce the fragmentation problem of an order of magnitude
+               making it equal to the current IA32.
+
+               We must also consider the x86-64 seems to provide in hardware a
+               per-irq stack that could allow us to remove the irq handler
+               footprint from the regular per-process-stack, so it could allow
+               us to live with a smaller kernel stack compared to the other
+               linux architectures.
+
+       3rd stage:
+
+       Before going into production if we still have the order 2
+       allocation we can add a sysctl that allows the kernel stack to be
+       allocated with vmalloc during memory fragmentation. This have to
+       remain turned off during benchmarks :) but it should be ok in real
+       life.
+
+Order of PAGE_CACHE_SIZE and other allocations:
+
+       On the long run we can increase the PAGE_CACHE_SIZE to be
+       an order 2 allocations and also the slab/buffercache etc.ec..
+       could be all done with order 2 allocations. To make the above
+       to work we should change lots of common code thus it can be done
+       only once the basic port will be in a production state. Having
+       a working PAGE_CACHE_SIZE would be a benefit also for
+       IA32 and other architectures of course.
+
+Andrea <andrea@suse.de> SuSE
diff --git a/arch/x86_64/Config.help b/arch/x86_64/Config.help
new file mode 100644 (file)
index 0000000..eb09405
--- /dev/null
@@ -0,0 +1,531 @@
+CONFIG_SMP
+  This enables support for systems with more than one CPU. If you have
+  a system with only one CPU, like most personal computers, say N. If
+  you have a system with more than one CPU, say Y.
+
+  If you say N here, the kernel will run on single and multiprocessor
+  machines, but will use only one CPU of a multiprocessor machine. If
+  you say Y here, the kernel will run on many, but not all,
+  singleprocessor machines. On a singleprocessor machine, the kernel
+  will run faster if you say N here.
+
+  Note that if you say Y here and choose architecture "586" or
+  "Pentium" under "Processor family", the kernel will not work on 486
+  architectures. Similarly, multiprocessor kernels for the "PPro"
+  architecture may not work on all Pentium based boards.
+
+  People using multiprocessor machines who say Y here should also say
+  Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
+  Management" code will be disabled if you say Y here.
+
+  See also the <file:Documentation/smp.tex>,
+  <file:Documentation/smp.txt>, <file:Documentation/i386/IO-APIC.txt>,
+  <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
+  <http://www.linuxdoc.org/docs.html#howto>.
+
+  If you don't know what to do here, say N.
+
+CONFIG_X86
+  This is Linux's home port.  Linux was originally native to the Intel
+  386, and runs on all the later x86 processors including the Intel
+  486, 586, Pentiums, and various instruction-set-compatible chips by
+  AMD, Cyrix, and others.
+
+CONFIG_X86_64
+  Port to the x86-64 architecture. x86-64 is an 64bit extension to the
+  classical 32bit x86 architecture. For details see http://www.x86-64.org
+
+CONFIG_X86_UP_IOAPIC
+  An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an
+  SMP-capable replacement for PC-style interrupt controllers. Most
+  SMP systems and a small number of uniprocessor systems have one.
+  If you have a single-CPU system with an IO-APIC, you can say Y here
+  to use it. If you say Y here even though your machine doesn't have
+  an IO-APIC, then the kernel will still run with no slowdown at all.
+
+  If you have a system with several CPUs, you do not need to say Y
+  here: the IO-APIC will be used automatically.
+
+CONFIG_X86_UP_APIC
+  A local APIC (Advanced Programmable Interrupt Controller) is an
+  integrated interrupt controller in the CPU. If you have a single-CPU
+  system which has a processor with a local APIC, you can say Y here to
+  enable and use it. If you say Y here even though your machine doesn't
+  have a local APIC, then the kernel will still run with no slowdown at
+  all. The local APIC supports CPU-generated self-interrupts (timer,
+  performance counters), and the NMI watchdog which detects hard lockups.
+
+  If you have a system with several CPUs, you do not need to say Y
+  here: the local APIC will be used automatically.
+
+CONFIG_IDE
+  If you say Y here, your kernel will be able to manage low cost mass
+  storage units such as ATA/(E)IDE and ATAPI units. The most common
+  cases are IDE hard drives and ATAPI CD-ROM drives.
+
+  If your system is pure SCSI and doesn't use these interfaces, you
+  can say N here.
+
+  Integrated Disk Electronics (IDE aka ATA-1) is a connecting standard
+  for mass storage units such as hard disks. It was designed by
+  Western Digital and Compaq Computer in 1984. It was then named
+  ST506. Quite a number of disks use the IDE interface.
+
+  AT Attachment (ATA) is the superset of the IDE specifications.
+  ST506 was also called ATA-1.
+
+  Fast-IDE is ATA-2 (also named Fast ATA), Enhanced IDE (EIDE) is
+  ATA-3. It provides support for larger disks (up to 8.4GB by means of
+  the LBA standard), more disks (4 instead of 2) and for other mass
+  storage units such as tapes and cdrom. UDMA/33 (aka UltraDMA/33) is
+  ATA-4 and provides faster (and more CPU friendly) transfer modes
+  than previous PIO (Programmed processor Input/Output) from previous
+  ATA/IDE standards by means of fast DMA controllers.
+
+  ATA Packet Interface (ATAPI) is a protocol used by EIDE tape and
+  CD-ROM drives, similar in many respects to the SCSI protocol.
+
+  SMART IDE (Self Monitoring, Analysis and Reporting Technology) was
+  designed in order to prevent data corruption and disk crash by
+  detecting pre hardware failure conditions (heat, access time, and
+  the like...). Disks built since June 1995 may follow this standard.
+  The kernel itself don't manage this; however there are quite a
+  number of user programs such as smart that can query the status of
+  SMART parameters disk.
+
+  If you want to compile this driver as a module ( = code which can be
+  inserted in and removed from the running kernel whenever you want),
+  say M here and read <file:Documentation/modules.txt>. The module
+  will be called ide.o.
+
+  For further information, please read <file:Documentation/ide.txt>.
+
+  If unsure, say Y.
+
+CONFIG_ISA
+  Find out whether you have ISA slots on your motherboard.  ISA is the
+  name of a bus system, i.e. the way the CPU talks to the other stuff
+  inside your box.  Other bus systems are PCI, EISA, MicroChannel
+  (MCA) or VESA.  ISA is an older system, now being displaced by PCI;
+  newer boards don't support it.  If you have ISA, say Y, otherwise N.
+
+CONFIG_PCI
+  Find out whether you have a PCI motherboard. PCI is the name of a
+  bus system, i.e. the way the CPU talks to the other stuff inside
+  your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
+  VESA. If you have PCI, say Y, otherwise N.
+
+  The PCI-HOWTO, available from
+  <http://www.linuxdoc.org/docs.html#howto>, contains valuable
+  information about which PCI hardware does work under Linux and which
+  doesn't.
+
+CONFIG_HOTPLUG_PCI
+  Say Y here if you have a motherboard with a PCI Hotplug controller.
+  This allows you to add and remove PCI cards while the machine is
+  powered up and running.  The file system pcihpfs must be mounted
+  in order to interact with any PCI Hotplug controllers.
+
+  This code is also available as a module ( = code which can be
+  inserted in and removed from the running kernel whenever you want).
+  The module will be called pci_hotplug.o. If you want to compile it
+  as a module, say M here and read <file:Documentation/modules.txt>.
+
+  When in doubt, say N.
+
+CONFIG_HOTPLUG
+  Say Y here if you want to plug devices into your computer while
+  the system is running, and be able to use them quickly.  In many
+  cases, the devices can likewise be unplugged at any time too.
+
+  One well known example of this is PCMCIA- or PC-cards, credit-card
+  size devices such as network cards, modems or hard drives which are
+  plugged into slots found on all modern laptop computers.  Another
+  example, used on modern desktops as well as laptops, is USB.
+
+  Enable HOTPLUG and KMOD, and build a modular kernel.  Get agent
+  software (at <http://linux-hotplug.sourceforge.net/>) and install it.
+  Then your kernel will automatically call out to a user mode "policy
+  agent" (/sbin/hotplug) to load modules and set up software needed
+  to use devices as you hotplug them.
+
+CONFIG_PCMCIA
+  Say Y here if you want to attach PCMCIA- or PC-cards to your Linux
+  computer.  These are credit-card size devices such as network cards,
+  modems or hard drives often used with laptops computers.  There are
+  actually two varieties of these cards: the older 16 bit PCMCIA cards
+  and the newer 32 bit CardBus cards.  If you want to use CardBus
+  cards, you need to say Y here and also to "CardBus support" below.
+
+  To use your PC-cards, you will need supporting software from David
+  Hinds' pcmcia-cs package (see the file <file:Documentation/Changes>
+  for location).  Please also read the PCMCIA-HOWTO, available from
+  <http://www.linuxdoc.org/docs.html#howto>.
+
+  This driver is also available as a module ( = code which can be
+  inserted in and removed from the running kernel whenever you want).
+  When compiled this way, there will be modules called pcmcia_core.o
+  and ds.o.  If you want to compile it as a module, say M here and
+  read <file:Documentation/modules.txt>.
+
+CONFIG_KCORE_ELF
+  If you enabled support for /proc file system then the file
+  /proc/kcore will contain the kernel core image. This can be used
+  in gdb:
+
+  $ cd /usr/src/linux ; gdb vmlinux /proc/kcore
+
+  You have two choices here: ELF and A.OUT. Selecting ELF will make
+  /proc/kcore appear in ELF core format as defined by the Executable
+  and Linking Format specification. Selecting A.OUT will choose the
+  old "a.out" format which may be necessary for some old versions
+  of binutils or on some architectures.
+
+  This is especially useful if you have compiled the kernel with the
+  "-g" option to preserve debugging information. It is mainly used
+  for examining kernel data structures on the live kernel so if you
+  don't understand what this means or are not a kernel hacker, just
+  leave it at its default value ELF.
+
+CONFIG_BINFMT_ELF
+  ELF (Executable and Linkable Format) is a format for libraries and
+  executables used across different architectures and operating
+  systems. Saying Y here will enable your kernel to run ELF binaries
+  and enlarge it by about 13 KB. ELF support under Linux has now all
+  but replaced the traditional Linux a.out formats (QMAGIC and ZMAGIC)
+  because it is portable (this does *not* mean that you will be able
+  to run executables from different architectures or operating systems
+  however) and makes building run-time libraries very easy. Many new
+  executables are distributed solely in ELF format. You definitely
+  want to say Y here.
+
+  Information about ELF is contained in the ELF HOWTO available from
+  <http://www.linuxdoc.org/docs.html#howto>.
+
+  If you find that after upgrading from Linux kernel 1.2 and saying Y
+  here, you still can't run any ELF binaries (they just crash), then
+  you'll have to install the newest ELF runtime libraries, including
+  ld.so (check the file <file:Documentation/Changes> for location and
+  latest version).
+
+  If you want to compile this as a module ( = code which can be
+  inserted in and removed from the running kernel whenever you want),
+  say M here and read <file:Documentation/modules.txt>.  The module
+  will be called binfmt_elf.o. Saying M or N here is dangerous because
+  some crucial programs on your system might be in ELF format.
+
+CONFIG_BINFMT_MISC
+  If you say Y here, it will be possible to plug wrapper-driven binary
+  formats into the kernel. You will like this especially when you use
+  programs that need an interpreter to run like Java, Python or
+  Emacs-Lisp. It's also useful if you often run DOS executables under
+  the Linux DOS emulator DOSEMU (read the DOSEMU-HOWTO, available from
+  <http://www.linuxdoc.org/docs.html#howto>). Once you have
+  registered such a binary class with the kernel, you can start one of
+  those programs simply by typing in its name at a shell prompt; Linux
+  will automatically feed it to the correct interpreter.
+
+  You can do other nice things, too. Read the file
+  <file:Documentation/binfmt_misc.txt> to learn how to use this
+  feature, and <file:Documentation/java.txt> for information about how
+  to include Java support.
+
+  You must say Y to "/proc file system support" (CONFIG_PROC_FS) to
+  use this part of the kernel.
+
+  You may say M here for module support and later load the module when
+  you have use for it; the module is called binfmt_misc.o. If you
+  don't know what to answer at this point, say Y.
+
+CONFIG_MK8
+  Support for AMD Clawhammer/Sledgehammer CPUs. Only choice for x86-64
+  currently so you should chose this if you want a x86-64 kernel. In fact
+  you will have no other choice than to chose this.
+
+CONFIG_VGA_CONSOLE
+  Saying Y here will allow you to use Linux in text mode through a
+  display that complies with the generic VGA standard. Virtually
+  everyone wants that.
+
+  The program SVGATextMode can be used to utilize SVGA video cards to
+  their full potential in text mode. Download it from
+  <ftp://ibiblio.org/pub/Linux/utils/console/>.
+
+  Say Y.
+
+CONFIG_VIDEO_SELECT
+  This enables support for text mode selection on kernel startup. If
+  you want to take advantage of some high-resolution text mode your
+  card's BIOS offers, but the traditional Linux utilities like
+  SVGATextMode don't, you can say Y here and set the mode using the
+  "vga=" option from your boot loader (lilo or loadlin) or set
+  "vga=ask" which brings up a video mode menu on kernel startup. (Try
+  "man bootparam" or see the documentation of your boot loader about
+  how to pass options to the kernel.)
+
+  Read the file <file:Documentation/svga.txt> for more information
+  about the Video mode selection support. If unsure, say N.
+
+CONFIG_MDA_CONSOLE
+  Say Y here if you have an old MDA or monochrome Hercules graphics
+  adapter in your system acting as a second head ( = video card). You
+  will then be able to use two monitors with your Linux system. Do not
+  say Y here if your MDA card is the primary card in your system; the
+  normal VGA driver will handle it.
+
+  This driver is also available as a module ( = code which can be
+  inserted and removed from the running kernel whenever you want).
+  The module will be called mdacon.o. If you want to compile it as
+  a module, say M here and read <file:Documentation/modules.txt>.
+
+  If unsure, say N.
+
+CONFIG_SCSI
+  If you want to use a SCSI hard disk, SCSI tape drive, SCSI CD-ROM or
+  any other SCSI device under Linux, say Y and make sure that you know
+  the name of your SCSI host adapter (the card inside your computer
+  that "speaks" the SCSI protocol, also called SCSI controller),
+  because you will be asked for it.
+
+  You also need to say Y here if you want support for the parallel
+  port version of the 100 MB IOMEGA ZIP drive.
+
+  This driver is also available as a module ( = code which can be
+  inserted in and removed from the running kernel whenever you want).
+  The module will be called scsi_mod.o.  If you want to compile it as
+  a module, say M here and read <file:Documentation/modules.txt> and
+  <file:Documentation/scsi.txt>.  However, do not compile this as a
+  module if your root file system (the one containing the directory /)
+  is located on a SCSI device.
+
+CONFIG_NETDEVICES
+  You can say N here if you don't intend to connect your Linux box to
+  any other computer at all or if all your connections will be over a
+  telephone line with a modem either via UUCP (UUCP is a protocol to
+  forward mail and news between unix hosts over telephone lines; read
+  the UUCP-HOWTO, available from
+  <http://www.linuxdoc.org/docs.html#howto>) or dialing up a shell
+  account or a BBS, even using term (term is a program which gives you
+  almost full Internet connectivity if you have a regular dial up
+  shell account on some Internet connected Unix computer. Read
+  <http://www.bart.nl/~patrickr/term-howto/Term-HOWTO.html>).
+
+  You'll have to say Y if your computer contains a network card that
+  you want to use under Linux (make sure you know its name because you
+  will be asked for it and read the Ethernet-HOWTO (especially if you
+  plan to use more than one network card under Linux)) or if you want
+  to use SLIP (Serial Line Internet Protocol is the protocol used to
+  send Internet traffic over telephone lines or null modem cables) or
+  CSLIP (compressed SLIP) or PPP (Point to Point Protocol, a better
+  and newer replacement for SLIP) or PLIP (Parallel Line Internet
+  Protocol is mainly used to create a mini network by connecting the
+  parallel ports of two local machines) or AX.25/KISS (protocol for
+  sending Internet traffic over amateur radio links).
+
+  Make sure to read the NET-3-HOWTO. Eventually, you will have to read
+  Olaf Kirch's excellent and free book "Network Administrator's
+  Guide", to be found in <http://www.linuxdoc.org/docs.html#guide>. If
+  unsure, say Y.
+
+CONFIG_CD_NO_IDESCSI
+  If you have a CD-ROM drive that is neither SCSI nor IDE/ATAPI, say Y
+  here, otherwise N. Read the CD-ROM-HOWTO, available from
+  <http://www.linuxdoc.org/docs.html#howto>.
+
+  Note that the answer to this question doesn't directly affect the
+  kernel: saying N will just cause the configurator to skip all
+  the questions about these CD-ROM drives. If you are unsure what you
+  have, say Y and find out whether you have one of the following
+  drives.
+
+  For each of these drivers, a file Documentation/cdrom/{driver_name}
+  exists. Especially in cases where you do not know exactly which kind
+  of drive you have you should read there. Most of these drivers use a
+  file drivers/cdrom/{driver_name}.h where you can define your
+  interface parameters and switch some internal goodies.
+
+  All these CD-ROM drivers are also usable as a module ( = code which
+  can be inserted in and removed from the running kernel whenever you
+  want). If you want to compile them as module, say M instead of Y and
+  read <file:Documentation/modules.txt>.
+
+  If you want to use any of these CD-ROM drivers, you also have to
+  answer Y or M to "ISO 9660 CD-ROM file system support" below (this
+  answer will get "defaulted" for you if you enable any of the Linux
+  CD-ROM drivers).
+
+CONFIG_MTRR
+  On Intel P6 family processors (Pentium Pro, Pentium II and later)
+  the Memory Type Range Registers (MTRRs) may be used to control
+  processor access to memory ranges. This is most useful if you have
+  a video (VGA) card on a PCI or AGP bus. Enabling write-combining
+  allows bus write transfers to be combined into a larger transfer
+  before bursting over the PCI/AGP bus. This can increase performance
+  of image write operations 2.5 times or more. Saying Y here creates a
+  /proc/mtrr file which may be used to manipulate your processor's
+  MTRRs. Typically the X server should use this.
+
+  This code has a reasonably generic interface so that similar
+  control registers on other processors can be easily supported
+  as well:
+
+  Saying Y here also fixes a problem with buggy SMP BIOSes which only
+  set the MTRRs for the boot CPU and not for the secondary CPUs. This
+  can lead to all sorts of problems, so it's good to say Y here.
+
+  Just say Y here, all x86-64 machines support MTRRs.
+
+  See <file:Documentation/mtrr.txt> for more information.
+
+CONFIG_PM
+  "Power Management" means that parts of your computer are shut
+  off or put into a power conserving "sleep" mode if they are not
+  being used.  There are two competing standards for doing this: APM
+  and ACPI.  If you want to use either one, say Y here and then also
+  to the requisite support below.
+
+  Power Management is most important for battery powered laptop
+  computers; if you have a laptop, check out the Linux Laptop home
+  page on the WWW at
+  <http://www.cs.utexas.edu/users/kharker/linux-laptop/> and the
+  Battery Powered Linux mini-HOWTO, available from
+  <http://www.linuxdoc.org/docs.html#howto>.
+
+  Note that, even if you say N here, Linux on the x86 architecture
+  will issue the hlt instruction if nothing is to be done, thereby
+  sending the processor to sleep and saving power.
+
+CONFIG_ACPI
+  ACPI/OSPM support for Linux is currently under development. As such,
+  this support is preliminary and EXPERIMENTAL.  Configuring ACPI
+  support enables kernel interfaces that allow higher level software
+  (OSPM) to manipulate ACPI defined hardware and software interfaces,
+  including the evaluation of ACPI control methods.  If unsure, choose
+  N here.  Note, this option will enlarge your kernel by about 120K.
+
+  This support requires an ACPI compliant platform (hardware/firmware).
+  If both ACPI and Advanced Power Management (APM) support are
+  configured, whichever is loaded first shall be used.
+
+  This code DOES NOT currently provide a complete OSPM implementation
+  -- it has not yet reached APM's level of functionality.  When fully
+  implemented, Linux ACPI/OSPM will provide a more robust functional
+  replacement for legacy configuration and power management
+  interfaces, including the Plug-and-Play BIOS specification (PnP
+  BIOS), the Multi-Processor Specification (MPS), and the Advanced
+  Power Management specification (APM).
+
+  Linux support for ACPI/OSPM is based on Intel Corporation's ACPI
+  Component Architecture (ACPI CA). The latest ACPI CA source code,
+  documentation, debug builds, and implementation status information
+  can be downloaded from:
+  <http://developer.intel.com/technology/iapc/acpi/downloads.htm>.
+
+  The ACPI Sourceforge project may also be of interest:
+  <http://sf.net/projects/acpi/>
+
+CONFIG_X86_MSR
+  This device gives privileged processes access to the x86
+  Model-Specific Registers (MSRs).  It is a character device with
+  major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr.
+  MSR accesses are directed to a specific CPU on multi-processor
+  systems.
+
+CONFIG_X86_CPUID
+  This device gives processes access to the x86 CPUID instruction to
+  be executed on a specific processor.  It is a character device
+  with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
+  /dev/cpu/31/cpuid.
+
+CONFIG_SOUND
+  If you have a sound card in your computer, i.e. if it can say more
+  than an occasional beep, say Y.  Be sure to have all the information
+  about your sound card and its configuration down (I/O port,
+  interrupt and DMA channel), because you will be asked for it.
+
+  You want to read the Sound-HOWTO, available from
+  <http://www.linuxdoc.org/docs.html#howto>. General information about
+  the modular sound system is contained in the files
+  <file:Documentation/sound/Introduction>.  The file
+  <file:Documentation/sound/README.OSS> contains some slightly
+  outdated but still useful information as well.
+
+  If you have a PnP sound card and you want to configure it at boot
+  time using the ISA PnP tools (read
+  <http://www.roestock.demon.co.uk/isapnptools/>), then you need to
+  compile the sound card support as a module ( = code which can be
+  inserted in and removed from the running kernel whenever you want)
+  and load that module after the PnP configuration is finished.  To do
+  this, say M here and read <file:Documentation/modules.txt> as well
+  as <file:Documentation/sound/README.modules>; the module will be
+  called soundcore.o.
+
+  I'm told that even without a sound card, you can make your computer
+  say more than an occasional beep, by programming the PC speaker.
+  Kernel patches and supporting utilities to do that are in the pcsp
+  package, available at <ftp://ftp.infradead.org/pub/pcsp/>.
+
+CONFIG_PREEMPT
+  This option reduces the latency of the kernel when reacting to
+  real-time or interactive events by allowing a low priority process to
+  be preempted even if it is in kernel mode executing a system call.
+  This allows applications to run more reliably even when the system is
+  under load. On contrary it may also break your drivers and add
+  priority inheritance problems to your system. Don't select it if 
+  you rely on a stable system or have slightly obscure hardware.
+  It's also not very well tested on x86-64 currently.
+  You have been warned.
+
+  Say Y here if you are feeling brave and building a kernel for a 
+  desktop, embedded or real-time system.  Say N if you are unsure. 
+
+CONFIG_MAGIC_SYSRQ
+  If you say Y here, you will have some control over the system even
+  if the system crashes for example during kernel debugging (e.g., you
+  will be able to flush the buffer cache to disk, reboot the system
+  immediately or dump some status information). This is accomplished
+  by pressing various keys while holding SysRq (Alt+PrintScreen). It
+  also works on a serial console (on PC hardware at least), if you
+  send a BREAK and then within 5 seconds a command keypress. The
+  keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
+  unless you really know what this hack does.
+
+CONFIG_DEBUG_KERNEL
+  Say Y here if you are developing drivers or trying to debug and
+  identify kernel problems.
+
+CONFIG_DEBUG_SLAB
+  Say Y here to have the kernel do limited verification on memory
+  allocation as well as poisoning memory on free to catch use of freed
+  memory.
+
+CONFIG_DEBUG_IOVIRT
+  Say Y here to get warned whenever an attempt is made to do I/O on
+  obviously invalid addresses such as those generated when ioremap()
+  calls are forgotten.  Memory mapped I/O will go through an extra
+  check to catch access to unmapped ISA addresses, an access method
+  that can still be used by old drivers that are being ported from
+  2.0/2.2.
+
+CONFIG_DEBUG_SPINLOCK
+  Say Y here and build SMP to catch missing spinlock initialization
+  and certain other kinds of spinlock errors commonly made.  This is
+  best used in conjunction with the NMI watchdog so that spinlock
+  deadlocks are also debuggable.
+
+CONFIG_CHECKING
+  Enables some internal consistency checks for kernel debugging.
+  You should normally say N.
+
+CONFIG_SIMNOW
+  Disable some time consuming optional things for slow CPU simulators.
+  Say N unless you're running on a slow simulator like Simics or SimNow. 
+
+CONFIG_EARLY_PRINTK
+  Write kernel log output directly into the VGA buffer. This is useful
+  for kernel debugging when your machine crashes very early before
+  the console code is initialized. For normal operation it is not
+  recommended because it looks ugly and doesn't cooperate with 
+  klogd/syslogd or the X server.You should normally N here, unless
+  you want to debug such a crash.
+  
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
new file mode 100644 (file)
index 0000000..e6b6078
--- /dev/null
@@ -0,0 +1,130 @@
+#
+# x86_64/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" and "archdep" for cleaning up and making dependencies for
+# this architecture
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#
+# 19990713  Artur Skawina <skawina@geocities.com>
+#           Added '-march' and '-mpreferred-stack-boundary' support
+# 20000913  Pavel Machek <pavel@suse.cz>
+#          Converted for x86_64 architecture
+# 20010105  Andi Kleen, add IA32 compiler.
+#
+# $Id: Makefile,v 1.28 2001/06/29 17:47:43 aj Exp $
+
+
+#
+# boot system currently needs IA32 tools to link (to be fixed) 
+#
+# Change this to your i386 compiler/binutils
+IA32_PREFIX := /usr/bin/
+IA32_CC := $(IA32_PREFIX)gcc -O2 -fomit-frame-pointer -nostdinc -I $(HPATH)
+IA32_LD := $(IA32_PREFIX)ld
+IA32_AS := $(IA32_PREFIX)gcc -D__ASSEMBLY__ -traditional -c -nostdinc -I $(HPATH) 
+IA32_OBJCOPY := $(IA32_PREFIX)objcopy
+IA32_CPP := $(IA32_PREFIX)gcc -E
+export IA32_CC IA32_LD IA32_AS IA32_OBJCOPY IA32_CPP
+
+
+LD=$(CROSS_COMPILE)ld -m elf_x86_64
+OBJCOPY=$(CROSS_COMPILE)objcopy -O binary -R .note -R .comment -S
+LDFLAGS=-e stext
+LINKFLAGS =-T $(TOPDIR)/arch/x86_64/vmlinux.lds $(LDFLAGS)
+
+CFLAGS += $(shell if $(CC) -mno-red-zone -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-mno-red-zone"; fi ) 
+CFLAGS += -mcmodel=kernel
+CFLAGS += -pipe
+# generates worse code, but makes the assembly much more readable:
+CFLAGS += -fno-reorder-blocks  
+# work around early gcc 3.1 bugs. Later snapshots should this already fixed.
+CFLAGS += -fno-strength-reduce
+# make sure all inline functions are inlined
+CFLAGS += -finline-limit=3000
+
+#CFLAGS += -g
+
+# prevent gcc from keeping the stack 16 byte aligned (FIXME)
+#CFLAGS += -mpreferred-stack-boundary=2
+
+HEAD := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
+
+SUBDIRS := arch/x86_64/tools $(SUBDIRS) arch/x86_64/kernel arch/x86_64/mm arch/x86_64/lib
+CORE_FILES := arch/x86_64/kernel/kernel.o $(CORE_FILES)
+CORE_FILES +=  arch/x86_64/mm/mm.o
+LIBS := $(TOPDIR)/arch/x86_64/lib/lib.a $(LIBS)
+
+CLEAN_FILES += include/asm-x86_64/offset.h
+
+ifdef CONFIG_IA32_EMULATION
+SUBDIRS += arch/x86_64/ia32
+CORE_FILES += arch/x86_64/ia32/ia32.o
+endif
+
+ifdef CONFIG_HOSTFS
+SUBDIRS += arch/x86_64/hostfs
+core-$(CONFIG_HOSTFS) += arch/x86_64/hostfs/hostfs.o
+endif
+
+CORE_FILES += $(core-y)
+
+arch/x86_64/tools: dummy
+       $(MAKE) linuxsubdirs SUBDIRS=arch/x86_64/tools 
+
+arch/x86_64/kernel: dummy 
+       $(MAKE) linuxsubdirs SUBDIRS=arch/x86_64/kernel
+
+arch/x86_64/mm: dummy
+       $(MAKE) linuxsubdirs SUBDIRS=arch/x86_64/mm
+
+MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot
+
+vmlinux: arch/x86_64/vmlinux.lds
+
+checkoffset: FORCE
+       make -C arch/$(ARCH)/tools $(TOPDIR)/include/asm-x86_64/offset.h
+
+FORCE: ;
+
+.PHONY: zImage bzImage compressed zlilo bzlilo zdisk bzdisk install \
+               clean archclean archmrproper archdep checkoffset
+
+bzImage: checkoffset vmlinux
+       @$(MAKEBOOT) bzImage
+
+bzImage-padded: checkoffset vmlinux
+       @$(MAKEBOOT) bzImage-padded
+
+tmp:
+       @$(MAKEBOOT) BOOTIMAGE=bzImage zlilo
+bzlilo: checkoffset vmlinux
+       @$(MAKEBOOT) BOOTIMAGE=bzImage zlilo
+
+zdisk: checkoffset vmlinux
+       @$(MAKEBOOT) BOOTIMAGE=zImage zdisk
+
+bzdisk: checkoffset vmlinux
+       @$(MAKEBOOT) BOOTIMAGE=bzImage zdisk
+
+install: checkoffset vmlinux
+       @$(MAKEBOOT) BOOTIMAGE=bzImage install
+
+archclean:
+       @$(MAKEBOOT) clean
+       $(MAKE) -C $(TOPDIR)/arch/x86_64/tools clean
+
+archmrproper:
+       rm -f $(TOPDIR)/arch/x86_64/tools/offset.h
+       rm -f $(TOPDIR)/arch/x86_64/tools/offset.tmp
+       rm -f $(TOPDIR)/include/asm-x86_64/offset.h
+
+archdep:
+       @$(MAKE) -C $(TOPDIR)/arch/x86_64/tools all
+       @$(MAKEBOOT) dep
diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile
new file mode 100644 (file)
index 0000000..409c1ce
--- /dev/null
@@ -0,0 +1,92 @@
+#
+# arch/x86_64/boot/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#
+
+BOOT_INCL =    $(TOPDIR)/include/linux/config.h \
+               $(TOPDIR)/include/linux/autoconf.h \
+               $(TOPDIR)/include/asm/boot.h
+
+zImage: $(CONFIGURE) bootsect setup compressed/vmlinux tools/build
+       $(OBJCOPY) compressed/vmlinux compressed/vmlinux.out
+       tools/build bootsect setup compressed/vmlinux.out $(ROOT_DEV) > zImage
+
+bzImage: $(CONFIGURE) bbootsect bsetup compressed/bvmlinux tools/build
+       $(OBJCOPY) compressed/bvmlinux compressed/bvmlinux.out
+       tools/build -b bbootsect bsetup compressed/bvmlinux.out $(ROOT_DEV) > bzImage
+
+bzImage-padded: bzImage
+       dd if=/dev/zero bs=1k count=70 >> bzImage
+
+compressed/vmlinux: $(TOPDIR)/vmlinux
+       @$(MAKE) -C compressed vmlinux
+
+compressed/bvmlinux: $(TOPDIR)/vmlinux
+       @$(MAKE) -C compressed bvmlinux
+
+zdisk: $(BOOTIMAGE)
+       dd bs=8192 if=$(BOOTIMAGE) of=/dev/fd0
+
+zlilo: $(CONFIGURE) $(BOOTIMAGE)
+       if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
+       if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
+       cat $(BOOTIMAGE) > $(INSTALL_PATH)/vmlinuz
+       cp $(TOPDIR)/System.map $(INSTALL_PATH)/
+       if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
+
+install: $(CONFIGURE) $(BOOTIMAGE)
+       sh -x ./install.sh $(KERNELRELEASE) $(BOOTIMAGE) $(TOPDIR)/System.map "$(INSTALL_PATH)"
+
+tools/build: tools/build.c
+       $(HOSTCC) $(HOSTCFLAGS) -o $@ $<
+
+bootsect: bootsect.o
+       $(IA32_LD) -Ttext 0x0 -s --oformat binary -o $@ $<
+
+bootsect.o: bootsect.s
+       $(IA32_AS) -o $@ $<
+
+bootsect.s: bootsect.S Makefile $(BOOT_INCL)
+       $(IA32_CPP) $(CPPFLAGS) -traditional -D__ASSEMBLY__ $(SVGA_MODE) $(RAMDISK) $< -o $@
+
+bbootsect: bbootsect.o
+       $(IA32_LD) -Ttext 0x0 -s --oformat binary $< -o $@
+
+bbootsect.o: bbootsect.s
+       $(IA32_AS) -o $@ $<
+
+bbootsect.s: bootsect.S Makefile $(BOOT_INCL)
+       $(IA32_CPP) $(CPPFLAGS) -D__BIG_KERNEL__ -D__ASSEMBLY__ -traditional $(SVGA_MODE) $(RAMDISK) $< -o $@
+
+setup: setup.o
+       $(IA32_LD) -Ttext 0x0 -s --oformat binary -e begtext -o $@ $<
+
+setup.o: setup.s
+       $(IA32_AS) -o $@ $<
+
+setup.s: setup.S video.S Makefile $(BOOT_INCL) $(TOPDIR)/include/linux/version.h $(TOPDIR)/include/linux/compile.h
+       $(IA32_CPP) $(CPPFLAGS) -traditional -D__ASSEMBLY__ $(SVGA_MODE) $(RAMDISK) $< -o $@
+
+bsetup: bsetup.o
+       $(IA32_LD) -Ttext 0x0 -s --oformat binary -e begtext -o $@ $<
+
+bsetup.o: bsetup.s
+       $(IA32_AS) -o $@ $<
+
+bsetup.s: setup.S video.S Makefile $(BOOT_INCL) $(TOPDIR)/include/linux/version.h $(TOPDIR)/include/linux/compile.h
+       $(IA32_CPP) $(CPPFLAGS) -D__BIG_KERNEL__ -D__ASSEMBLY__ -traditional $(SVGA_MODE) $(RAMDISK) $< -o $@
+
+dep:
+
+clean:
+       rm -f tools/build
+       rm -f setup bootsect zImage compressed/vmlinux.out
+       rm -f bsetup bbootsect bzImage compressed/bvmlinux.out
+       @$(MAKE) -C compressed clean
+
+
diff --git a/arch/x86_64/boot/bootsect.S b/arch/x86_64/boot/bootsect.S
new file mode 100644 (file)
index 0000000..c17f4ba
--- /dev/null
@@ -0,0 +1,416 @@
+/*
+ *     bootsect.S              Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ *     modified by Drew Eckhardt
+ *     modified by Bruce Evans (bde)
+ *     modified by Chris Noe (May 1999) (as86 -> gas)
+ *
+ * 360k/720k disk support: Andrzej Krzysztofowicz <ankry@green.mif.pg.gda.pl>
+ *
+ * BIG FAT NOTE: We're in real mode using 64k segments.  Therefore segment
+ * addresses must be multiplied by 16 to obtain their respective linear
+ * addresses. To avoid confusion, linear addresses are written using leading
+ * hex while segment addresses are written as segment:offset.
+ *
+ * bde - should not jump blindly, there may be systems with only 512K low
+ * memory.  Use int 0x12 to get the top of memory, etc.
+ *
+ * It then loads 'setup' directly after itself (0x90200), and the system
+ * at 0x10000, using BIOS interrupts. 
+ *
+ * NOTE! currently system is at most (8*65536-4096) bytes long. This should 
+ * be no problem, even in the future. I want to keep it simple. This 508 kB
+ * kernel size should be enough, especially as this doesn't contain the
+ * buffer cache as in minix (and especially now that the kernel is 
+ * compressed :-)
+ *
+ * The loader has been made as simple as possible, and continuous
+ * read errors will result in a unbreakable loop. Reboot by hand. It
+ * loads pretty fast by getting whole tracks at a time whenever possible.
+ */
+
+#include <asm/boot.h>
+
+SETUPSECTS     = 4                     /* default nr of setup-sectors */
+BOOTSEG                = 0x07C0                /* original address of boot-sector */
+INITSEG                = DEF_INITSEG           /* we move boot here - out of the way */
+SETUPSEG       = DEF_SETUPSEG          /* setup starts here */
+SYSSEG         = DEF_SYSSEG            /* system loaded at 0x10000 (65536) */
+SYSSIZE                = DEF_SYSSIZE           /* system size: # of 16-byte clicks */
+                                       /* to be loaded */
+ROOT_DEV       = 0                     /* ROOT_DEV is now written by "build" */
+SWAP_DEV       = 0                     /* SWAP_DEV is now written by "build" */
+
+#ifndef SVGA_MODE
+#define SVGA_MODE ASK_VGA
+#endif
+
+#ifndef RAMDISK
+#define RAMDISK 0
+#endif
+
+#ifndef ROOT_RDONLY
+#define ROOT_RDONLY 1
+#endif
+
+.code16
+.text
+
+.global _start
+_start:
+
+# First things first. Move ourself from 0x7C00 -> 0x90000 and jump there.
+
+       movw    $BOOTSEG, %ax
+       movw    %ax, %ds                # %ds = BOOTSEG
+       movw    $INITSEG, %ax
+       movw    %ax, %es                # %ax = %es = INITSEG
+       movw    $256, %cx
+       subw    %si, %si
+       subw    %di, %di
+       cld
+       rep
+       movsw
+       ljmp    $INITSEG, $go
+
+# bde - changed 0xff00 to 0x4000 to use debugger at 0x6400 up (bde).  We
+# wouldn't have to worry about this if we checked the top of memory.  Also
+# my BIOS can be configured to put the wini drive tables in high memory
+# instead of in the vector table.  The old stack might have clobbered the
+# drive table.
+
+go:    movw    $0x4000-12, %di         # 0x4000 is an arbitrary value >=
+                                       # length of bootsect + length of
+                                       # setup + room for stack;
+                                       # 12 is disk parm size.
+       movw    %ax, %ds                # %ax and %es already contain INITSEG
+       movw    %ax, %ss
+       movw    %di, %sp                # put stack at INITSEG:0x4000-12.
+
+# Many BIOS's default disk parameter tables will not recognize
+# multi-sector reads beyond the maximum sector number specified
+# in the default diskette parameter tables - this may mean 7
+# sectors in some cases.
+#
+# Since single sector reads are slow and out of the question,
+# we must take care of this by creating new parameter tables
+# (for the first disk) in RAM.  We will set the maximum sector
+# count to 36 - the most we will encounter on an ED 2.88.  
+#
+# High doesn't hurt.  Low does.
+#
+# Segments are as follows: %cs = %ds = %es = %ss = INITSEG, %fs = 0,
+# and %gs is unused.
+
+       movw    %cx, %fs                # %fs = 0
+       movw    $0x78, %bx              # %fs:%bx is parameter table address
+       pushw   %ds
+       ldsw    %fs:(%bx), %si          # %ds:%si is source
+       movb    $6, %cl                 # copy 12 bytes
+       pushw   %di                     # %di = 0x4000-12.
+       rep                             # don't worry about cld
+       movsw                           # already done above
+       popw    %di
+       popw    %ds
+       movb    $36, 0x4(%di)           # patch sector count
+       movw    %di, %fs:(%bx)
+       movw    %es, %fs:2(%bx)
+
+# Get disk drive parameters, specifically number of sectors/track.
+
+# It seems that there is no BIOS call to get the number of sectors.
+# Guess 36 sectors if sector 36 can be read, 18 sectors if sector 18
+# can be read, 15 if sector 15 can be read.  Otherwise guess 9.
+# Note that %cx = 0 from rep movsw above.
+
+       movw    $disksizes, %si         # table of sizes to try
+probe_loop:
+       lodsb
+       cbtw                            # extend to word
+       movw    %ax, sectors
+       cmpw    $disksizes+4, %si
+       jae     got_sectors             # If all else fails, try 9
+
+       xchgw   %cx, %ax                # %cx = track and sector
+       xorw    %dx, %dx                # drive 0, head 0
+       movw    $0x0200, %bx            # address = 512, in INITSEG (%es = %cs)
+       movw    $0x0201, %ax            # service 2, 1 sector
+       int     $0x13
+       jc      probe_loop              # try next value
+
+got_sectors:
+       movb    $0x03, %ah              # read cursor pos
+       xorb    %bh, %bh
+       int     $0x10
+       movw    $9, %cx
+       movb    $0x07, %bl              # page 0, attribute 7 (normal)
+                                       # %bh is set above; int10 doesn't
+                                       # modify it
+       movw    $msg1, %bp
+       movw    $0x1301, %ax            # write string, move cursor
+       int     $0x10                   # tell the user we're loading..
+
+# Load the setup-sectors directly after the moved bootblock (at 0x90200).
+# We should know the drive geometry to do it, as setup may exceed first
+# cylinder (for 9-sector 360K and 720K floppies).
+
+       movw    $0x0001, %ax            # set sread (sector-to-read) to 1 as
+       movw    $sread, %si             # the boot sector has already been read
+       movw    %ax, (%si)
+
+       call    kill_motor              # reset FDC
+       movw    $0x0200, %bx            # address = 512, in INITSEG
+next_step:
+       movb    setup_sects, %al
+       movw    sectors, %cx
+       subw    (%si), %cx              # (%si) = sread
+       cmpb    %cl, %al
+       jbe     no_cyl_crossing
+       movw    sectors, %ax
+       subw    (%si), %ax              # (%si) = sread
+no_cyl_crossing:
+       call    read_track
+       pushw   %ax                     # save it
+       call    set_next                # set %bx properly; it uses %ax,%cx,%dx
+       popw    %ax                     # restore
+       subb    %al, setup_sects        # rest - for next step
+       jnz     next_step
+
+       pushw   $SYSSEG
+       popw    %es                     # %es = SYSSEG
+       call    read_it
+       call    kill_motor
+       call    print_nl
+
+# After that we check which root-device to use. If the device is
+# defined (!= 0), nothing is done and the given device is used.
+# Otherwise, one of /dev/fd0H2880 (2,32) or /dev/PS0 (2,28) or /dev/at0 (2,8)
+# depending on the number of sectors we pretend to know we have.
+
+# Segments are as follows: %cs = %ds = %ss = INITSEG,
+#      %es = SYSSEG, %fs = 0, %gs is unused.
+
+       movw    root_dev, %ax
+       orw     %ax, %ax
+       jne     root_defined
+
+       movw    sectors, %bx
+       movw    $0x0208, %ax            # /dev/ps0 - 1.2Mb
+       cmpw    $15, %bx
+       je      root_defined
+
+       movb    $0x1c, %al              # /dev/PS0 - 1.44Mb
+       cmpw    $18, %bx
+       je      root_defined
+
+       movb    $0x20, %al              # /dev/fd0H2880 - 2.88Mb
+       cmpw    $36, %bx
+       je      root_defined
+
+       movb    $0, %al                 # /dev/fd0 - autodetect
+root_defined:
+       movw    %ax, root_dev
+
+# After that (everything loaded), we jump to the setup-routine
+# loaded directly after the bootblock:
+
+       ljmp    $SETUPSEG, $0
+
+# These variables are addressed via %si register as it gives shorter code.
+
+sread: .word 0                         # sectors read of current track
+head:  .word 0                         # current head
+track: .word 0                         # current track
+
+# This routine loads the system at address SYSSEG, making sure
+# no 64kB boundaries are crossed. We try to load it as fast as
+# possible, loading whole tracks whenever we can.
+
+read_it:
+       movw    %es, %ax                # %es = SYSSEG when called
+       testw   $0x0fff, %ax
+die:   jne     die                     # %es must be at 64kB boundary
+       xorw    %bx, %bx                # %bx is starting address within segment
+rp_read:
+#ifdef __BIG_KERNEL__                  # look in setup.S for bootsect_kludge
+       bootsect_kludge = 0x220         # 0x200 + 0x20 which is the size of the
+       lcall   *bootsect_kludge        # bootsector + bootsect_kludge offset
+#else
+       movw    %es, %ax
+       subw    $SYSSEG, %ax
+       movw    %bx, %cx
+       shr     $4, %cx
+       add     %cx, %ax                # check offset
+#endif
+       cmpw    syssize, %ax            # have we loaded everything yet?
+       jbe     ok1_read
+
+       ret
+
+ok1_read:
+       movw    sectors, %ax
+       subw    (%si), %ax              # (%si) = sread
+       movw    %ax, %cx
+       shlw    $9, %cx
+       addw    %bx, %cx
+       jnc     ok2_read
+
+       je      ok2_read
+
+       xorw    %ax, %ax
+       subw    %bx, %ax
+       shrw    $9, %ax
+ok2_read:
+       call    read_track
+       call    set_next
+       jmp     rp_read
+
+read_track:
+       pusha
+       pusha   
+       movw    $0xe2e, %ax             # loading... message 2e = .
+       movw    $7, %bx
+       int     $0x10
+       popa            
+
+# Accessing head, track, sread via %si gives shorter code.
+
+       movw    4(%si), %dx             # 4(%si) = track
+       movw    (%si), %cx              # (%si)  = sread
+       incw    %cx
+       movb    %dl, %ch
+       movw    2(%si), %dx             # 2(%si) = head
+       movb    %dl, %dh
+       andw    $0x0100, %dx
+       movb    $2, %ah
+       pushw   %dx                     # save for error dump
+       pushw   %cx
+       pushw   %bx
+       pushw   %ax
+       int     $0x13
+       jc      bad_rt
+
+       addw    $8, %sp
+       popa
+       ret
+
+set_next:
+       movw    %ax, %cx
+       addw    (%si), %ax              # (%si) = sread
+       cmp     sectors, %ax
+       jne     ok3_set
+       movw    $0x0001, %ax
+       xorw    %ax, 2(%si)             # change head
+       jne     ok4_set
+       incw    4(%si)                  # next track
+ok4_set:
+       xorw    %ax, %ax
+ok3_set:
+       movw    %ax, (%si)              # set sread
+       shlw    $9, %cx
+       addw    %cx, %bx
+       jnc     set_next_fin
+       movw    %es, %ax
+       addb    $0x10, %ah
+       movw    %ax, %es
+       xorw    %bx, %bx
+set_next_fin:
+       ret
+
+bad_rt:
+       pushw   %ax                     # save error code
+       call    print_all               # %ah = error, %al = read
+       xorb    %ah, %ah
+       xorb    %dl, %dl
+       int     $0x13
+       addw    $10, %sp
+       popa
+       jmp read_track
+
+# print_all is for debugging purposes.  
+#
+# it will print out all of the registers.  The assumption is that this is
+# called from a routine, with a stack frame like
+#
+#      %dx 
+#      %cx
+#      %bx
+#      %ax
+#      (error)
+#      ret <- %sp
+print_all:
+       movw    $5, %cx                 # error code + 4 registers
+       movw    %sp, %bp
+print_loop:
+       pushw   %cx                     # save count remaining
+       call    print_nl                # <-- for readability
+       cmpb    $5, %cl
+       jae     no_reg                  # see if register name is needed
+       
+       movw    $0xe05 + 'A' - 1, %ax
+       subb    %cl, %al
+       int     $0x10
+       movb    $'X', %al
+       int     $0x10
+       movb    $':', %al
+       int     $0x10
+no_reg:
+       addw    $2, %bp                 # next register
+       call    print_hex               # print it
+       popw    %cx
+       loop    print_loop
+       ret
+
+print_nl:
+       movw    $0xe0d, %ax             # CR
+       int     $0x10
+       movb    $0xa, %al               # LF
+       int     $0x10
+       ret
+
+# print_hex is for debugging purposes, and prints the word
+# pointed to by %ss:%bp in hexadecimal.
+
+print_hex:
+       movw    $4, %cx                 # 4 hex digits
+       movw    (%bp), %dx              # load word into %dx
+print_digit:
+       rolw    $4, %dx                 # rotate to use low 4 bits
+       movw    $0xe0f, %ax             # %ah = request
+       andb    %dl, %al                # %al = mask for nybble
+       addb    $0x90, %al              # convert %al to ascii hex
+       daa                             # in only four instructions!
+       adc     $0x40, %al
+       daa
+       int     $0x10
+       loop    print_digit
+       ret
+
+# This procedure turns off the floppy drive motor, so
+# that we enter the kernel in a known state, and
+# don't have to worry about it later.
+# NOTE: Doesn't save %ax or %dx; do it yourself if you need to.
+
+kill_motor:
+       movw    $0x3f2, %dx
+       xorb    %al, %al
+       outb    %al, %dx
+       ret
+
+sectors:       .word 0
+disksizes:     .byte 36, 18, 15, 9
+msg1:          .byte 13, 10
+               .ascii "Loading"
+
+# XXX: This is a fairly snug fit.
+
+.org 497
+setup_sects:   .byte SETUPSECTS
+root_flags:    .word ROOT_RDONLY
+syssize:       .word SYSSIZE
+swap_dev:      .word SWAP_DEV
+ram_size:      .word RAMDISK
+vid_mode:      .word SVGA_MODE
+root_dev:      .word ROOT_DEV
+boot_flag:     .word 0xAA55
diff --git a/arch/x86_64/boot/compressed/Makefile b/arch/x86_64/boot/compressed/Makefile
new file mode 100644 (file)
index 0000000..0832d02
--- /dev/null
@@ -0,0 +1,43 @@
+#
+# linux/arch/i386/boot/compressed/Makefile
+#
+# create a compressed vmlinux image from the original vmlinux
+#
+
+HEAD = head.o
+SYSTEM = $(TOPDIR)/vmlinux
+
+OBJECTS = $(HEAD) misc.o
+
+IA32_CFLAGS := -O2 -DSTDC_HEADERS
+
+#
+# ZIMAGE_OFFSET is the load offset of the compression loader
+# BZIMAGE_OFFSET is the load offset of the high loaded compression loader
+#
+BZIMAGE_OFFSET = 0x100000
+
+BZLINKFLAGS = -Ttext $(BZIMAGE_OFFSET) $(ZLDFLAGS)
+
+all: vmlinux
+
+bvmlinux: piggy.o $(OBJECTS)
+       $(IA32_LD) $(BZLINKFLAGS) -o bvmlinux $(OBJECTS) piggy.o
+
+head.o: head.S
+       $(IA32_AS) -c head.S
+
+misc.o: misc.c
+       $(IA32_CC) $(IA32_CFLAGS) -c misc.c
+
+piggy.o:       $(SYSTEM)
+       tmppiggy=_tmp_$$$$piggy; \
+       rm -f $$tmppiggy $$tmppiggy.gz $$tmppiggy.lnk; \
+       $(OBJCOPY) $(SYSTEM) $$tmppiggy; \
+       gzip -f -9 < $$tmppiggy > $$tmppiggy.gz; \
+       echo "SECTIONS { .data : { input_len = .; LONG(input_data_end - input_data) input_data = .; *(.data) input_data_end = .; }}" > $$tmppiggy.lnk; \
+       $(IA32_LD) -r -o piggy.o -b binary $$tmppiggy.gz -b elf32-i386 -T $$tmppiggy.lnk; \
+       rm -f $$tmppiggy $$tmppiggy.gz $$tmppiggy.lnk
+
+clean:
+       rm -f vmlinux bvmlinux _tmp_*
diff --git a/arch/x86_64/boot/compressed/head.S b/arch/x86_64/boot/compressed/head.S
new file mode 100644 (file)
index 0000000..0f7c98e
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ *  linux/boot/head.S
+ *
+ *  Copyright (C) 1991, 1992, 1993  Linus Torvalds
+ *
+ *  $Id: head.S,v 1.3 2001/04/20 00:59:28 ak Exp $                     
+ */
+
+/*
+ *  head.S contains the 32-bit startup code.
+ *
+ * NOTE!!! Startup happens at absolute address 0x00001000, which is also where
+ * the page directory will exist. The startup code will be overwritten by
+ * the page directory. [According to comments etc elsewhere on a compressed
+ * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
+ *
+ * Page 0 is deliberately kept safe, since System Management Mode code in 
+ * laptops may need to access the BIOS data stored there.  This is also
+ * useful for future device drivers that either access the BIOS via VM86 
+ * mode.
+ */
+
+/*
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996    
+ */
+.code32
+.text
+
+#include <linux/linkage.h>
+#include <asm/segment.h>
+
+       .code32
+       .globl startup_32
+       
+startup_32:
+       cld
+       cli
+       movl $(__KERNEL_DS),%eax
+       movl %eax,%ds
+       movl %eax,%es
+       movl %eax,%fs
+       movl %eax,%gs
+
+       lss SYMBOL_NAME(stack_start),%esp
+       xorl %eax,%eax
+1:     incl %eax               # check that A20 really IS enabled
+       movl %eax,0x000000      # loop forever if it isn't
+       cmpl %eax,0x100000
+       je 1b
+
+/*
+ * Initialize eflags.  Some BIOS's leave bits like NT set.  This would
+ * confuse the debugger if this code is traced.
+ * XXX - best to initialize before switching to protected mode.
+ */
+       pushl $0
+       popfl
+/*
+ * Clear BSS
+ */
+       xorl %eax,%eax
+       movl $ SYMBOL_NAME(_edata),%edi
+       movl $ SYMBOL_NAME(_end),%ecx
+       subl %edi,%ecx
+       cld
+       rep
+       stosb
+/*
+ * Do the decompression, and jump to the new kernel..
+ */
+       subl $16,%esp   # place for structure on the stack
+       movl %esp,%eax
+       pushl %esi      # real mode pointer as second arg
+       pushl %eax      # address of structure as first arg
+       call SYMBOL_NAME(decompress_kernel)
+       orl  %eax,%eax 
+       jnz  3f
+       addl $8,%esp
+       xorl %ebx,%ebx
+       ljmp $(__KERNEL_CS), $0x100000
+
+/*
+ * We come here, if we were loaded high.
+ * We need to move the move-in-place routine down to 0x1000
+ * and then start it with the buffer addresses in registers,
+ * which we got from the stack.
+ */
+3:
+       movl %esi,%ebx  
+       movl $move_routine_start,%esi
+       movl $0x1000,%edi
+       movl $move_routine_end,%ecx
+       subl %esi,%ecx
+       addl $3,%ecx
+       shrl $2,%ecx
+       cld
+       rep
+       movsl
+
+       popl %esi       # discard the address
+       addl $4,%esp    # real mode pointer
+       popl %esi       # low_buffer_start
+       popl %ecx       # lcount
+       popl %edx       # high_buffer_start
+       popl %eax       # hcount
+       movl $0x100000,%edi
+       cli             # make sure we don't get interrupted
+       ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine
+
+/*
+ * Routine (template) for moving the decompressed kernel in place,
+ * if we were high loaded. This _must_ PIC-code !
+ */
+move_routine_start:
+       movl %ecx,%ebp
+       shrl $2,%ecx
+       rep
+       movsl
+       movl %ebp,%ecx
+       andl $3,%ecx
+       rep
+       movsb
+       movl %edx,%esi
+       movl %eax,%ecx  # NOTE: rep movsb won't move if %ecx == 0
+       addl $3,%ecx
+       shrl $2,%ecx
+       rep
+       movsl
+       movl %ebx,%esi  # Restore setup pointer
+       xorl %ebx,%ebx
+       ljmp $(__KERNEL_CS), $0x100000
+move_routine_end:
+
+
+/* Stack for uncompression */  
+       .align 32
+user_stack:            
+       .fill 4096,4,0
+stack_start:   
+       .long user_stack+4096
+       .word __KERNEL_DS
+
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c
new file mode 100644 (file)
index 0000000..c0ff34e
--- /dev/null
@@ -0,0 +1,431 @@
+/*
+ * misc.c
+ * 
+ * This is a collection of several routines from gzip-1.0.3 
+ * adapted for Linux.
+ *
+ * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
+ * puts by Nick Holloway 1993, better puts by Martin Mares 1995
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
+ */
+
+#include "miscsetup.h"
+#include <asm/io.h>
+
+/*
+ * gzip declarations
+ */
+
+#define OF(args)  args
+#define STATIC static
+
+#undef memset
+#undef memcpy
+#define memzero(s, n)     memset ((s), 0, (n))
+
+typedef unsigned char  uch;
+typedef unsigned short ush;
+typedef unsigned long  ulg;
+
+#define WSIZE 0x8000           /* Window size must be at least 32k, */
+                               /* and a power of two */
+
+static uch *inbuf;          /* input buffer */
+static uch window[WSIZE];    /* Sliding window buffer */
+
+static unsigned insize = 0;  /* valid bytes in inbuf */
+static unsigned inptr = 0;   /* index of next byte to be processed in inbuf */
+static unsigned outcnt = 0;  /* bytes in output buffer */
+
+/* gzip flag byte */
+#define ASCII_FLAG   0x01 /* bit 0 set: file probably ASCII text */
+#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
+#define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
+#define ORIG_NAME    0x08 /* bit 3 set: original file name present */
+#define COMMENT      0x10 /* bit 4 set: file comment present */
+#define ENCRYPTED    0x20 /* bit 5 set: file is encrypted */
+#define RESERVED     0xC0 /* bit 6,7:   reserved */
+
+#define get_byte()  (inptr < insize ? inbuf[inptr++] : fill_inbuf())
+               
+/* Diagnostic functions */
+#ifdef DEBUG
+#  define Assert(cond,msg) {if(!(cond)) error(msg);}
+#  define Trace(x) fprintf x
+#  define Tracev(x) {if (verbose) fprintf x ;}
+#  define Tracevv(x) {if (verbose>1) fprintf x ;}
+#  define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
+#  define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
+#else
+#  define Assert(cond,msg)
+#  define Trace(x)
+#  define Tracev(x)
+#  define Tracevv(x)
+#  define Tracec(c,x)
+#  define Tracecv(c,x)
+#endif
+
+static int  fill_inbuf(void);
+static void flush_window(void);
+static void error(char *m);
+static void gzip_mark(void **);
+static void gzip_release(void **);
+  
+/*
+ * This is set up by the setup-routine at boot-time
+ */
+static unsigned char *real_mode; /* Pointer to real-mode data */
+
+#define EXT_MEM_K   (*(unsigned short *)(real_mode + 0x2))
+#ifndef STANDARD_MEMORY_BIOS_CALL
+#define ALT_MEM_K   (*(unsigned long *)(real_mode + 0x1e0))
+#endif
+#define SCREEN_INFO (*(struct screen_info *)(real_mode+0))
+
+extern char input_data[];
+extern int input_len;
+
+static long bytes_out = 0;
+static uch *output_data;
+static unsigned long output_ptr = 0;
+
+static void *malloc(int size);
+static void free(void *where);
+static void error(char *m);
+static void gzip_mark(void **);
+static void gzip_release(void **);
+static void puts(const char *);
+  
+extern int end;
+static long free_mem_ptr = (long)&end;
+static long free_mem_end_ptr;
+
+#define INPLACE_MOVE_ROUTINE  0x1000
+#define LOW_BUFFER_START      0x2000
+#define LOW_BUFFER_MAX       0x90000
+#define HEAP_SIZE             0x3000
+static unsigned int low_buffer_end, low_buffer_size;
+static int high_loaded =0;
+static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;
+
+static char *vidmem = (char *)0xb8000;
+static int vidport;
+static int lines, cols;
+
+#include "../../../../lib/inflate.c"
+
+static void *malloc(int size)
+{
+       void *p;
+
+       if (size <0) error("Malloc error\n");
+       if (free_mem_ptr <= 0) error("Memory error\n");
+
+       free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */
+
+       p = (void *)free_mem_ptr;
+       free_mem_ptr += size;
+
+       if (free_mem_ptr >= free_mem_end_ptr)
+               error("\nOut of memory\n");
+
+       return p;
+}
+
+static void free(void *where)
+{      /* Don't care */
+}
+
+static void gzip_mark(void **ptr)
+{
+       *ptr = (void *) free_mem_ptr;
+}
+
+static void gzip_release(void **ptr)
+{
+       free_mem_ptr = (long) *ptr;
+}
+static void scroll(void)
+{
+       int i;
+
+       memcpy ( vidmem, vidmem + cols * 2, ( lines - 1 ) * cols * 2 );
+       for ( i = ( lines - 1 ) * cols * 2; i < lines * cols * 2; i += 2 )
+               vidmem[i] = ' ';
+}
+
+static void puts(const char *s)
+{
+       int x,y,pos;
+       char c;
+
+       x = SCREEN_INFO.orig_x;
+       y = SCREEN_INFO.orig_y;
+
+       while ( ( c = *s++ ) != '\0' ) {
+               if ( c == '\n' ) {
+                       x = 0;
+                       if ( ++y >= lines ) {
+                               scroll();
+                               y--;
+                       }
+               } else {
+                       vidmem [ ( x + cols * y ) * 2 ] = c; 
+                       if ( ++x >= cols ) {
+                               x = 0;
+                               if ( ++y >= lines ) {
+                                       scroll();
+                                       y--;
+                               }
+                       }
+               }
+       }
+
+       SCREEN_INFO.orig_x = x;
+       SCREEN_INFO.orig_y = y;
+
+       pos = (x + cols * y) * 2;       /* Update cursor position */
+       outb_p(14, vidport);
+       outb_p(0xff & (pos >> 9), vidport+1);
+       outb_p(15, vidport);
+       outb_p(0xff & (pos >> 1), vidport+1);
+}
+
+void* memset(void* s, int c, size_t n)
+{
+       int i;
+       char *ss = (char*)s;
+
+       for (i=0;i<n;i++) ss[i] = c;
+       return s;
+}
+
+void* memcpy(void* __dest, __const void* __src,
+                           size_t __n)
+{
+       int i;
+       char *d = (char *)__dest, *s = (char *)__src;
+
+       for (i=0;i<__n;i++) d[i] = s[i];
+       return __dest;
+}
+
+/* ===========================================================================
+ * Fill the input buffer. This is called only when the buffer is empty
+ * and at least one byte is really needed.
+ */
+static int fill_inbuf(void)
+{
+       if (insize != 0) {
+               error("ran out of input data\n");
+       }
+
+       inbuf = input_data;
+       insize = input_len;
+       inptr = 1;
+       return inbuf[0];
+}
+
+/* ===========================================================================
+ * Write the output window window[0..outcnt-1] and update crc and bytes_out.
+ * (Used for the decompressed data only.)
+ */
+static void flush_window_low(void)
+{
+    ulg c = crc;         /* temporary variable */
+    unsigned n;
+    uch *in, *out, ch;
+    
+    in = window;
+    out = &output_data[output_ptr]; 
+    for (n = 0; n < outcnt; n++) {
+           ch = *out++ = *in++;
+           c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+    }
+    crc = c;
+    bytes_out += (ulg)outcnt;
+    output_ptr += (ulg)outcnt;
+    outcnt = 0;
+}
+
+static void flush_window_high(void)
+{
+    ulg c = crc;         /* temporary variable */
+    unsigned n;
+    uch *in,  ch;
+    in = window;
+    for (n = 0; n < outcnt; n++) {
+       ch = *output_data++ = *in++;
+       if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start;
+       c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+    }
+    crc = c;
+    bytes_out += (ulg)outcnt;
+    outcnt = 0;
+}
+
+static void flush_window(void)
+{
+       if (high_loaded) flush_window_high();
+       else flush_window_low();
+}
+
+static void error(char *x)
+{
+       puts("\n\n");
+       puts(x);
+       puts("\n\n -- System halted");
+
+       while(1);       /* Halt */
+}
+
+void setup_normal_output_buffer(void)
+{
+#ifdef STANDARD_MEMORY_BIOS_CALL
+       if (EXT_MEM_K < 1024) error("Less than 2MB of memory.\n");
+#else
+       if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < 1024) error("Less than 2MB of memory.\n");
+#endif
+       output_data = (char *)0x100000; /* Points to 1M */
+       free_mem_end_ptr = (long)real_mode;
+}
+
+struct moveparams {
+       uch *low_buffer_start;  int lcount;
+       uch *high_buffer_start; int hcount;
+};
+
+void setup_output_buffer_if_we_run_high(struct moveparams *mv)
+{
+       high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE);
+#ifdef STANDARD_MEMORY_BIOS_CALL
+       if (EXT_MEM_K < (3*1024)) error("Less than 4MB of memory.\n");
+#else
+       if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory.\n");
+#endif 
+       mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START;
+       low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
+         ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
+       low_buffer_size = low_buffer_end - LOW_BUFFER_START;
+       high_loaded = 1;
+       free_mem_end_ptr = (long)high_buffer_start;
+       if ( (0x100000 + low_buffer_size) > ((ulg)high_buffer_start)) {
+               high_buffer_start = (uch *)(0x100000 + low_buffer_size);
+               mv->hcount = 0; /* say: we need not to move high_buffer */
+       }
+       else mv->hcount = -1;
+       mv->high_buffer_start = high_buffer_start;
+}
+
+void close_output_buffer_if_we_run_high(struct moveparams *mv)
+{
+       if (bytes_out > low_buffer_size) {
+               mv->lcount = low_buffer_size;
+               if (mv->hcount)
+                       mv->hcount = bytes_out - low_buffer_size;
+       } else {
+               mv->lcount = bytes_out;
+               mv->hcount = 0;
+       }
+}
+
+void check_cpu(void)
+{
+       int res = 0;
+       asm volatile( " \n\
+       movl $3,%%edx           # at least 386 \n\
+       pushfl                  # push EFLAGS \n\
+       popl %%eax              # get EFLAGS \n\
+       movl %%eax,%%ecx                # save original EFLAGS \n\
+       xorl $0x40000,%%eax     # flip AC bit in EFLAGS \n\
+       pushl %%eax             # copy to EFLAGS \n\
+       popfl                   # set EFLAGS \n\
+       pushfl                  # get new EFLAGS \n\
+       popl %%eax              # put it in eax \n\
+       xorl %%ecx,%%eax                # change in flags \n\
+       andl $0x40000,%%eax     # check if AC bit changed \n\
+       je 1f \n\
+\n\
+       movl $4,%%edx           # at least 486 \n\
+       movl %%ecx,%%eax \n\
+       xorl $0x200000,%%eax    # check ID flag \n\
+       pushl %%eax \n\
+       popfl                   # if we are on a straight 486DX, SX, or \n\
+       pushfl                  # 487SX we can't change it \n\
+       popl %%eax \n\
+       xorl %%ecx,%%eax \n\
+       pushl %%ecx             # restore original EFLAGS \n\
+       popfl \n\
+       andl $0x200000,%%eax \n\
+       je 1f \n\
+\n\
+       /* get vendor info */ \n\
+#      xorl %%eax,%%eax                        # call CPUID with 0 -> return vendor ID \n\
+#      cpuid \n\
+#      movl $5, %%edx \n\
+#      cmpl $0x41757468,%%ebx          # check thats amd \n\
+#      jne 1f \n\
+\n\
+       mov $0x80000000,%%eax           # Is extended cpuid supported?\n\
+       cpuid\n\
+       test $0x80000000,%%eax\n\
+       movl $5, %%edx \n\
+       jz 1f\n\
+\n\
+       movl $0x80000001,%%eax \n\
+       cpuid \n\
+       andl $0x20000000,%%edx \n\
+       movl $6, %%edx \n\
+       jz 1f \n\
+\n\
+       movl $7, %%edx \n\
+1:" : "=d" (res) : : "eax", "ebx", "ecx" );
+
+       switch (res) {
+       case 3: puts( "386" );
+               break;
+       case 4: puts( "486" );
+               break;
+       case 5: puts( "no extended cpuid" );
+               break;
+       case 6: puts( "non-64bit 586+" );
+               break;
+       case 7: puts( "64bit" );
+               break;
+       default:puts( "internal error" );
+               break;
+       }
+       if (res !=7)
+               error( "Sorry, your CPU is not capable of running 64-bit kernel." );
+}
+
+int decompress_kernel(struct moveparams *mv, void *rmode)
+{
+       real_mode = rmode;
+
+       if (SCREEN_INFO.orig_video_mode == 7) {
+               vidmem = (char *) 0xb0000;
+               vidport = 0x3b4;
+       } else {
+               vidmem = (char *) 0xb8000;
+               vidport = 0x3d4;
+       }
+
+       lines = SCREEN_INFO.orig_video_lines;
+       cols = SCREEN_INFO.orig_video_cols;
+
+       if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
+       else setup_output_buffer_if_we_run_high(mv);
+
+       makecrc();
+       puts("Checking CPU type...");
+       check_cpu();
+       puts(".\nDecompressing Linux...");
+       gunzip();
+       puts("done.\nBooting the kernel.\n");
+       if (high_loaded) close_output_buffer_if_we_run_high(mv);
+       return high_loaded;
+}
diff --git a/arch/x86_64/boot/compressed/miscsetup.h b/arch/x86_64/boot/compressed/miscsetup.h
new file mode 100644 (file)
index 0000000..85dc2d8
--- /dev/null
@@ -0,0 +1,39 @@
+#define NULL 0
+typedef unsigned int size_t; 
+
+
+struct screen_info {
+       unsigned char  orig_x;                  /* 0x00 */
+       unsigned char  orig_y;                  /* 0x01 */
+       unsigned short dontuse1;                /* 0x02 -- EXT_MEM_K sits here */
+       unsigned short orig_video_page;         /* 0x04 */
+       unsigned char  orig_video_mode;         /* 0x06 */
+       unsigned char  orig_video_cols;         /* 0x07 */
+       unsigned short unused2;                 /* 0x08 */
+       unsigned short orig_video_ega_bx;       /* 0x0a */
+       unsigned short unused3;                 /* 0x0c */
+       unsigned char  orig_video_lines;        /* 0x0e */
+       unsigned char  orig_video_isVGA;        /* 0x0f */
+       unsigned short orig_video_points;       /* 0x10 */
+
+       /* VESA graphic mode -- linear frame buffer */
+       unsigned short lfb_width;               /* 0x12 */
+       unsigned short lfb_height;              /* 0x14 */
+       unsigned short lfb_depth;               /* 0x16 */
+       unsigned long  lfb_base;                /* 0x18 */
+       unsigned long  lfb_size;                /* 0x1c */
+       unsigned short dontuse2, dontuse3;      /* 0x20 -- CL_MAGIC and CL_OFFSET here */
+       unsigned short lfb_linelength;          /* 0x24 */
+       unsigned char  red_size;                /* 0x26 */
+       unsigned char  red_pos;                 /* 0x27 */
+       unsigned char  green_size;              /* 0x28 */
+       unsigned char  green_pos;               /* 0x29 */
+       unsigned char  blue_size;               /* 0x2a */
+       unsigned char  blue_pos;                /* 0x2b */
+       unsigned char  rsvd_size;               /* 0x2c */
+       unsigned char  rsvd_pos;                /* 0x2d */
+       unsigned short vesapm_seg;              /* 0x2e */
+       unsigned short vesapm_off;              /* 0x30 */
+       unsigned short pages;                   /* 0x32 */
+                                               /* 0x34 -- 0x3f reserved for future expansion */
+};
diff --git a/arch/x86_64/boot/install.sh b/arch/x86_64/boot/install.sh
new file mode 100644 (file)
index 0000000..346ea8f
--- /dev/null
@@ -0,0 +1,39 @@
+#!/bin/sh
+#
+# arch/i386/boot/install.sh
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1995 by Linus Torvalds
+#
+# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
+#
+# "make install" script for i386 architecture
+#
+# Arguments:
+#   $1 - kernel version
+#   $2 - kernel image file
+#   $3 - kernel map file
+#   $4 - default install path (blank if root directory)
+#
+
+# User may have a custom install script
+
+if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi
+
+# Default install - same as make zlilo
+
+if [ -f $4/vmlinuz ]; then
+       mv $4/vmlinuz $4/vmlinuz.old
+fi
+
+if [ -f $4/System.map ]; then
+       mv $4/System.map $4/System.old
+fi
+
+cat $2 > $4/vmlinuz
+cp $3 $4/System.map
+
+if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S
new file mode 100644 (file)
index 0000000..c40d0eb
--- /dev/null
@@ -0,0 +1,955 @@
+/*
+ *     setup.S         Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * setup.s is responsible for getting the system data from the BIOS,
+ * and putting them into the appropriate places in system memory.
+ * both setup.s and system has been loaded by the bootblock.
+ *
+ * This code asks the bios for memory/disk/other parameters, and
+ * puts them in a "safe" place: 0x90000-0x901FF, ie where the
+ * boot-block used to be. It is then up to the protected mode
+ * system to read them from there before the area is overwritten
+ * for buffer-blocks.
+ *
+ * Move PS/2 aux init code to psaux.c
+ * (troyer@saifr00.cfsat.Honeywell.COM) 03Oct92
+ *
+ * some changes and additional features by Christoph Niemann,
+ * March 1993/June 1994 (Christoph.Niemann@linux.org)
+ *
+ * add APM BIOS checking by Stephen Rothwell, May 1994
+ * (sfr@canb.auug.org.au)
+ *
+ * High load stuff, initrd support and position independency
+ * by Hans Lermen & Werner Almesberger, February 1996
+ * <lermen@elserv.ffm.fgan.de>, <almesber@lrc.epfl.ch>
+ *
+ * Video handling moved to video.S by Martin Mares, March 1996
+ * <mj@k332.feld.cvut.cz>
+ *
+ * Extended memory detection scheme retwiddled by orc@pell.chi.il.us (david
+ * parsons) to avoid loadlin confusion, July 1997
+ *
+ * Transcribed from Intel (as86) -> AT&T (gas) by Chris Noe, May 1999.
+ * <stiker@northlink.com>
+ *
+ * Fix to work around buggy BIOSes which dont use carry bit correctly
+ * and/or report extended memory in CX/DX for e801h memory size detection 
+ * call.  As a result the kernel got wrong figures.  The int15/e801h docs
+ * from Ralf Brown interrupt list seem to indicate AX/BX should be used
+ * anyway.  So to avoid breaking many machines (presumably there was a reason
+ * to orginally use CX/DX instead of AX/BX), we do a kludge to see
+ * if CX/DX have been changed in the e801 call and if so use AX/BX .
+ * Michael Miller, April 2001 <michaelm@mjmm.org>
+ *
+ */
+
+#include <linux/config.h>
+#include <asm/segment.h>
+#include <linux/version.h>
+#include <linux/compile.h>
+#include <asm/boot.h>
+#include <asm/e820.h>
+#include <asm/page.h>
+
+/* Signature words to ensure LILO loaded us right */
+#define SIG1   0xAA55
+#define SIG2   0x5A5A
+
+INITSEG  = DEF_INITSEG         # 0x9000, we move boot here, out of the way
+SYSSEG   = DEF_SYSSEG          # 0x1000, system loaded at 0x10000 (65536).
+SETUPSEG = DEF_SETUPSEG                # 0x9020, this is the current segment
+                               # ... and the former contents of CS
+
+DELTA_INITSEG = SETUPSEG - INITSEG     # 0x0020
+
+.code16
+.globl begtext, begdata, begbss, endtext, enddata, endbss
+
+.text
+begtext:
+.data
+begdata:
+.bss
+begbss:
+.text
+
+start:
+       jmp     trampoline
+
+# This is the setup header, and it must start at %cs:2 (old 0x9020:2)
+
+               .ascii  "HdrS"          # header signature
+               .word   0x0203          # header version number (>= 0x0105)
+                                       # or else old loadlin-1.5 will fail)
+realmode_swtch:        .word   0, 0            # default_switch, SETUPSEG
+start_sys_seg: .word   SYSSEG
+               .word   kernel_version  # pointing to kernel version string
+                                       # above section of header is compatible
+                                       # with loadlin-1.5 (header v1.5). Don't
+                                       # change it.
+
+type_of_loader:        .byte   0               # = 0, old one (LILO, Loadlin,
+                                       #      Bootlin, SYSLX, bootsect...)
+                                       # See Documentation/i386/boot.txt for
+                                       # assigned ids
+       
+# flags, unused bits must be zero (RFU) bit within loadflags
+loadflags:
+LOADED_HIGH    = 1                     # If set, the kernel is loaded high
+CAN_USE_HEAP   = 0x80                  # If set, the loader also has set
+                                       # heap_end_ptr to tell how much
+                                       # space behind setup.S can be used for
+                                       # heap purposes.
+                                       # Only the loader knows what is free
+#ifndef __BIG_KERNEL__
+               .byte   0
+#else
+               .byte   LOADED_HIGH
+#endif
+
+setup_move_size: .word  0x8000         # size to move, when setup is not
+                                       # loaded at 0x90000. We will move setup 
+                                       # to 0x90000 then just before jumping
+                                       # into the kernel. However, only the
+                                       # loader knows how much data behind
+                                       # us also needs to be loaded.
+
+code32_start:                          # here loaders can put a different
+                                       # start address for 32-bit code.
+#ifndef __BIG_KERNEL__
+               .long   0x1000          #   0x1000 = default for zImage
+#else
+               .long   0x100000        # 0x100000 = default for big kernel
+#endif
+
+ramdisk_image: .long   0               # address of loaded ramdisk image
+                                       # Here the loader puts the 32-bit
+                                       # address where it loaded the image.
+                                       # This only will be read by the kernel.
+
+ramdisk_size:  .long   0               # its size in bytes
+
+bootsect_kludge:
+               .word  bootsect_helper, SETUPSEG
+
+heap_end_ptr:  .word   modelist+1024   # (Header version 0x0201 or later)
+                                       # space from here (exclusive) down to
+                                       # end of setup code can be used by setup
+                                       # for local heap purposes.
+
+pad1:          .word   0
+cmd_line_ptr:  .long 0                 # (Header version 0x0202 or later)
+                                       # If nonzero, a 32-bit pointer
+                                       # to the kernel command line.
+                                       # The command line should be
+                                       # located between the start of
+                                       # setup and the end of low
+                                       # memory (0xa0000), or it may
+                                       # get overwritten before it
+                                       # gets read.  If this field is
+                                       # used, there is no longer
+                                       # anything magical about the
+                                       # 0x90000 segment; the setup
+                                       # can be located anywhere in
+                                       # low memory 0x10000 or higher.
+
+ramdisk_max:   .long 0xffffffff
+       
+trampoline:    call    start_of_setup
+               .space  1024
+# End of setup header #####################################################
+
+start_of_setup:
+# Bootlin depends on this being done early
+       movw    $0x01500, %ax
+       movb    $0x81, %dl
+       int     $0x13
+
+#ifdef SAFE_RESET_DISK_CONTROLLER
+# Reset the disk controller.
+       movw    $0x0000, %ax
+       movb    $0x80, %dl
+       int     $0x13
+#endif
+
+# Set %ds = %cs, we know that SETUPSEG = %cs at this point
+       movw    %cs, %ax                # aka SETUPSEG
+       movw    %ax, %ds
+# Check signature at end of setup
+       cmpw    $SIG1, setup_sig1
+       jne     bad_sig
+
+       cmpw    $SIG2, setup_sig2
+       jne     bad_sig
+
+       jmp     good_sig1
+
+# Routine to print asciiz string at ds:si
+prtstr:
+       lodsb
+       andb    %al, %al
+       jz      fin
+
+       call    prtchr
+       jmp     prtstr
+
+fin:   ret
+
+# Space printing
+prtsp2:        call    prtspc          # Print double space
+prtspc:        movb    $0x20, %al      # Print single space (note: fall-thru)
+
+# Part of above routine, this one just prints ascii al
+prtchr:        pushw   %ax
+       pushw   %cx
+       xorb    %bh, %bh
+       movw    $0x01, %cx
+       movb    $0x0e, %ah
+       int     $0x10
+       popw    %cx
+       popw    %ax
+       ret
+
+beep:  movb    $0x07, %al
+       jmp     prtchr
+       
+no_sig_mess: .string   "No setup signature found ..."
+
+good_sig1:
+       jmp     good_sig
+
+# We now have to find the rest of the setup code/data
+bad_sig:
+       movw    %cs, %ax                        # SETUPSEG
+       subw    $DELTA_INITSEG, %ax             # INITSEG
+       movw    %ax, %ds
+       xorb    %bh, %bh
+       movb    (497), %bl                      # get setup sect from bootsect
+       subw    $4, %bx                         # LILO loads 4 sectors of setup
+       shlw    $8, %bx                         # convert to words (1sect=2^8 words)
+       movw    %bx, %cx
+       shrw    $3, %bx                         # convert to segment
+       addw    $SYSSEG, %bx
+       movw    %bx, %cs:start_sys_seg
+# Move rest of setup code/data to here
+       movw    $2048, %di                      # four sectors loaded by LILO
+       subw    %si, %si
+       movw    %cs, %ax                        # aka SETUPSEG
+       movw    %ax, %es
+       movw    $SYSSEG, %ax
+       movw    %ax, %ds
+       rep
+       movsw
+       movw    %cs, %ax                        # aka SETUPSEG
+       movw    %ax, %ds
+       cmpw    $SIG1, setup_sig1
+       jne     no_sig
+
+       cmpw    $SIG2, setup_sig2
+       jne     no_sig
+
+       jmp     good_sig
+
+no_sig:
+       lea     no_sig_mess, %si
+       call    prtstr
+
+no_sig_loop:
+       jmp     no_sig_loop
+
+good_sig:
+       movw    %cs, %ax                        # aka SETUPSEG
+       subw    $DELTA_INITSEG, %ax             # aka INITSEG
+       movw    %ax, %ds
+# Check if an old loader tries to load a big-kernel
+       testb   $LOADED_HIGH, %cs:loadflags     # Do we have a big kernel?
+       jz      loader_ok                       # No, no danger for old loaders.
+
+       cmpb    $0, %cs:type_of_loader          # Do we have a loader that
+                                               # can deal with us?
+       jnz     loader_ok                       # Yes, continue.
+
+       pushw   %cs                             # No, we have an old loader,
+       popw    %ds                             # die. 
+       lea     loader_panic_mess, %si
+       call    prtstr
+
+       jmp     no_sig_loop
+
+loader_panic_mess: .string "Wrong loader, giving up..."
+
+loader_ok:
+# Get memory size (extended mem, kB)
+
+       xorl    %eax, %eax
+       movl    %eax, (0x1e0)
+#ifndef STANDARD_MEMORY_BIOS_CALL
+       movb    %al, (E820NR)
+# Try three different memory detection schemes.  First, try
+# e820h, which lets us assemble a memory map, then try e801h,
+# which returns a 32-bit memory size, and finally 88h, which
+# returns 0-64m
+
+# method E820H:
+# the memory map from hell.  e820h returns memory classified into
+# a whole bunch of different types, and allows memory holes and
+# everything.  We scan through this memory map and build a list
+# of the first 32 memory areas, which we return at [E820MAP].
+# This is documented at http://www.teleport.com/~acpi/acpihtml/topic245.htm
+
+#define SMAP  0x534d4150
+
+meme820:
+       xorl    %ebx, %ebx                      # continuation counter
+       movw    $E820MAP, %di                   # point into the whitelist
+                                               # so we can have the bios
+                                               # directly write into it.
+
+jmpe820:
+       movl    $0x0000e820, %eax               # e820, upper word zeroed
+       movl    $SMAP, %edx                     # ascii 'SMAP'
+       movl    $20, %ecx                       # size of the e820rec
+       pushw   %ds                             # data record.
+       popw    %es
+       int     $0x15                           # make the call
+       jc      bail820                         # fall to e801 if it fails
+
+       cmpl    $SMAP, %eax                     # check the return is `SMAP'
+       jne     bail820                         # fall to e801 if it fails
+
+#      cmpl    $1, 16(%di)                     # is this usable memory?
+#      jne     again820
+
+       # If this is usable memory, we save it by simply advancing %di by
+       # sizeof(e820rec).
+       #
+good820:
+       movb    (E820NR), %al                   # up to 32 entries
+       cmpb    $E820MAX, %al
+       jnl     bail820
+
+       incb    (E820NR)
+       movw    %di, %ax
+       addw    $20, %ax
+       movw    %ax, %di
+again820:
+       cmpl    $0, %ebx                        # check to see if
+       jne     jmpe820                         # %ebx is set to EOF
+bail820:
+
+
+# method E801H:
+# memory size is in 1k chunksizes, to avoid confusing loadlin.
+# we store the 0xe801 memory size in a completely different place,
+# because it will most likely be longer than 16 bits.
+# (use 1e0 because that's what Larry Augustine uses in his
+# alternative new memory detection scheme, and it's sensible
+# to write everything into the same place.)
+
+meme801:
+       stc                                     # fix to work around buggy
+       xorw    %cx,%cx                         # BIOSes which dont clear/set
+       xorw    %dx,%dx                         # carry on pass/error of
+                                               # e801h memory size call
+                                               # or merely pass cx,dx though
+                                               # without changing them.
+       movw    $0xe801, %ax
+       int     $0x15
+       jc      mem88
+
+       cmpw    $0x0, %cx                       # Kludge to handle BIOSes
+       jne     e801usecxdx                     # which report their extended
+       cmpw    $0x0, %dx                       # memory in AX/BX rather than
+       jne     e801usecxdx                     # CX/DX.  The spec I have read
+       movw    %ax, %cx                        # seems to indicate AX/BX 
+       movw    %bx, %dx                        # are more reasonable anyway...
+
+e801usecxdx:
+       andl    $0xffff, %edx                   # clear sign extend
+       shll    $6, %edx                        # and go from 64k to 1k chunks
+       movl    %edx, (0x1e0)                   # store extended memory size
+       andl    $0xffff, %ecx                   # clear sign extend
+       addl    %ecx, (0x1e0)                   # and add lower memory into
+                                               # total size.
+
+# Ye Olde Traditional Methode.  Returns the memory size (up to 16mb or
+# 64mb, depending on the bios) in ax.
+mem88:
+
+#endif
+       movb    $0x88, %ah
+       int     $0x15
+       movw    %ax, (2)
+
+# Set the keyboard repeat rate to the max
+       movw    $0x0305, %ax
+       xorw    %bx, %bx
+       int     $0x16
+
+# Check for video adapter and its parameters and allow the
+# user to browse video modes.
+       call    video                           # NOTE: we need %ds pointing
+                                               # to bootsector
+
+# Get hd0 data...
+       xorw    %ax, %ax
+       movw    %ax, %ds
+       ldsw    (4 * 0x41), %si
+       movw    %cs, %ax                        # aka SETUPSEG
+       subw    $DELTA_INITSEG, %ax             # aka INITSEG
+       pushw   %ax
+       movw    %ax, %es
+       movw    $0x0080, %di
+       movw    $0x10, %cx
+       pushw   %cx
+       cld
+       rep
+       movsb
+# Get hd1 data...
+       xorw    %ax, %ax
+       movw    %ax, %ds
+       ldsw    (4 * 0x46), %si
+       popw    %cx
+       popw    %es
+       movw    $0x0090, %di
+       rep
+       movsb
+# Check that there IS a hd1 :-)
+       movw    $0x01500, %ax
+       movb    $0x81, %dl
+       int     $0x13
+       jc      no_disk1
+       
+       cmpb    $3, %ah
+       je      is_disk1
+
+no_disk1:
+       movw    %cs, %ax                        # aka SETUPSEG
+       subw    $DELTA_INITSEG, %ax             # aka INITSEG
+       movw    %ax, %es
+       movw    $0x0090, %di
+       movw    $0x10, %cx
+       xorw    %ax, %ax
+       cld
+       rep
+       stosb
+is_disk1:
+# check for Micro Channel (MCA) bus
+       movw    %cs, %ax                        # aka SETUPSEG
+       subw    $DELTA_INITSEG, %ax             # aka INITSEG
+       movw    %ax, %ds
+       xorw    %ax, %ax
+       movw    %ax, (0xa0)                     # set table length to 0
+       movb    $0xc0, %ah
+       stc
+       int     $0x15                           # moves feature table to es:bx
+       jc      no_mca
+
+       pushw   %ds
+       movw    %es, %ax
+       movw    %ax, %ds
+       movw    %cs, %ax                        # aka SETUPSEG
+       subw    $DELTA_INITSEG, %ax             # aka INITSEG
+       movw    %ax, %es
+       movw    %bx, %si
+       movw    $0xa0, %di
+       movw    (%si), %cx
+       addw    $2, %cx                         # table length is a short
+       cmpw    $0x10, %cx
+       jc      sysdesc_ok
+
+       movw    $0x10, %cx                      # we keep only first 16 bytes
+sysdesc_ok:
+       rep
+       movsb
+       popw    %ds
+no_mca:
+# Check for PS/2 pointing device
+       movw    %cs, %ax                        # aka SETUPSEG
+       subw    $DELTA_INITSEG, %ax             # aka INITSEG
+       movw    %ax, %ds
+       movw    $0, (0x1ff)                     # default is no pointing device
+       int     $0x11                           # int 0x11: equipment list
+       testb   $0x04, %al                      # check if mouse installed
+       jz      no_psmouse
+
+       movw    $0xAA, (0x1ff)                  # device present
+no_psmouse:
+
+#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
+# Then check for an APM BIOS...
+                                               # %ds points to the bootsector
+       movw    $0, 0x40                        # version = 0 means no APM BIOS
+       movw    $0x05300, %ax                   # APM BIOS installation check
+       xorw    %bx, %bx
+       int     $0x15
+       jc      done_apm_bios                   # Nope, no APM BIOS
+       
+       cmpw    $0x0504d, %bx                   # Check for "PM" signature
+       jne     done_apm_bios                   # No signature, no APM BIOS
+
+       andw    $0x02, %cx                      # Is 32 bit supported?
+       je      done_apm_bios                   # No 32-bit, no (good) APM BIOS
+
+       movw    $0x05304, %ax                   # Disconnect first just in case
+       xorw    %bx, %bx
+       int     $0x15                           # ignore return code
+       movw    $0x05303, %ax                   # 32 bit connect
+       xorl    %ebx, %ebx
+       xorw    %cx, %cx                        # paranoia :-)
+       xorw    %dx, %dx                        #   ...
+       xorl    %esi, %esi                      #   ...
+       xorw    %di, %di                        #   ...
+       int     $0x15
+       jc      no_32_apm_bios                  # Ack, error. 
+
+       movw    %ax,  (66)                      # BIOS code segment
+       movl    %ebx, (68)                      # BIOS entry point offset
+       movw    %cx,  (72)                      # BIOS 16 bit code segment
+       movw    %dx,  (74)                      # BIOS data segment
+       movl    %esi, (78)                      # BIOS code segment lengths
+       movw    %di,  (82)                      # BIOS data segment length
+# Redo the installation check as the 32 bit connect
+# modifies the flags returned on some BIOSs
+       movw    $0x05300, %ax                   # APM BIOS installation check
+       xorw    %bx, %bx
+       xorw    %cx, %cx                        # paranoia
+       int     $0x15
+       jc      apm_disconnect                  # error -> shouldn't happen
+
+       cmpw    $0x0504d, %bx                   # check for "PM" signature
+       jne     apm_disconnect                  # no sig -> shouldn't happen
+
+       movw    %ax, (64)                       # record the APM BIOS version
+       movw    %cx, (76)                       # and flags
+       jmp     done_apm_bios
+
+apm_disconnect:                                        # Tidy up
+       movw    $0x05304, %ax                   # Disconnect
+       xorw    %bx, %bx
+       int     $0x15                           # ignore return code
+
+       jmp     done_apm_bios
+
+no_32_apm_bios:
+       andw    $0xfffd, (76)                   # remove 32 bit support bit
+done_apm_bios:
+#endif
+
+# Now we want to move to protected mode ...
+       cmpw    $0, %cs:realmode_swtch
+       jz      rmodeswtch_normal
+
+       lcall   *%cs:realmode_swtch
+
+       jmp     rmodeswtch_end
+
+rmodeswtch_normal:
+        pushw  %cs
+       call    default_switch
+
+rmodeswtch_end:
+# we get the code32 start address and modify the below 'jmpi'
+# (loader may have changed it)
+       movl    %cs:code32_start, %eax
+       movl    %eax, %cs:code32
+
+# Now we move the system to its rightful place ... but we check if we have a
+# big-kernel. In that case we *must* not move it ...
+       testb   $LOADED_HIGH, %cs:loadflags
+       jz      do_move0                        # .. then we have a normal low
+                                               # loaded zImage
+                                               # .. or else we have a high
+                                               # loaded bzImage
+       jmp     end_move                        # ... and we skip moving
+
+do_move0:
+       movw    $0x100, %ax                     # start of destination segment
+       movw    %cs, %bp                        # aka SETUPSEG
+       subw    $DELTA_INITSEG, %bp             # aka INITSEG
+       movw    %cs:start_sys_seg, %bx          # start of source segment
+       cld
+do_move:
+       movw    %ax, %es                        # destination segment
+       incb    %ah                             # instead of add ax,#0x100
+       movw    %bx, %ds                        # source segment
+       addw    $0x100, %bx
+       subw    %di, %di
+       subw    %si, %si
+       movw    $0x800, %cx
+       rep
+       movsw
+       cmpw    %bp, %bx                        # assume start_sys_seg > 0x200,
+                                               # so we will perhaps read one
+                                               # page more than needed, but
+                                               # never overwrite INITSEG
+                                               # because destination is a
+                                               # minimum one page below source
+       jb      do_move
+
+end_move:
+# then we load the segment descriptors
+       movw    %cs, %ax                        # aka SETUPSEG
+       movw    %ax, %ds
+               
+# Check whether we need to be downward compatible with version <=201
+       cmpl    $0, cmd_line_ptr
+       jne     end_move_self           # loader uses version >=202 features
+       cmpb    $0x20, type_of_loader
+       je      end_move_self           # bootsect loader, we know of it
+
+# Boot loader doesnt support boot protocol version 2.02.
+# If we have our code not at 0x90000, we need to move it there now.
+# We also then need to move the params behind it (commandline)
+# Because we would overwrite the code on the current IP, we move
+# it in two steps, jumping high after the first one.
+       movw    %cs, %ax
+       cmpw    $SETUPSEG, %ax
+       je      end_move_self
+
+       cli                                     # make sure we really have
+                                               # interrupts disabled !
+                                               # because after this the stack
+                                               # should not be used
+       subw    $DELTA_INITSEG, %ax             # aka INITSEG
+       movw    %ss, %dx
+       cmpw    %ax, %dx
+       jb      move_self_1
+
+       addw    $INITSEG, %dx
+       subw    %ax, %dx                        # this will go into %ss after
+                                               # the move
+move_self_1:
+       movw    %ax, %ds
+       movw    $INITSEG, %ax                   # real INITSEG
+       movw    %ax, %es
+       movw    %cs:setup_move_size, %cx
+       std                                     # we have to move up, so we use
+                                               # direction down because the
+                                               # areas may overlap
+       movw    %cx, %di
+       decw    %di
+       movw    %di, %si
+       subw    $move_self_here+0x200, %cx
+       rep
+       movsb
+       ljmp    $SETUPSEG, $move_self_here
+
+move_self_here:
+       movw    $move_self_here+0x200, %cx
+       rep
+       movsb
+       movw    $SETUPSEG, %ax
+       movw    %ax, %ds
+       movw    %dx, %ss
+end_move_self:                                 # now we are at the right place
+       lidt    idt_48                          # load idt with 0,0
+       xorl    %eax, %eax                      # Compute gdt_base
+       movw    %ds, %ax                        # (Convert %ds:gdt to a linear ptr)
+       shll    $4, %eax
+       addl    $gdt, %eax
+       movl    %eax, (gdt_48+2)
+       lgdt    gdt_48                          # load gdt with whatever is
+                                               # appropriate
+
+# that was painless, now we enable a20
+       call    empty_8042
+
+       movb    $0xD1, %al                      # command write
+       outb    %al, $0x64
+       call    empty_8042
+
+       movb    $0xDF, %al                      # A20 on
+       outb    %al, $0x60
+       call    empty_8042
+
+#
+#      You must preserve the other bits here. Otherwise embarrasing things
+#      like laptops powering off on boot happen. Corrected version by Kira
+#      Brown from Linux 2.2
+#
+       inb     $0x92, %al                      # 
+       orb     $02, %al                        # "fast A20" version
+       outb    %al, $0x92                      # some chips have only this
+
+# wait until a20 really *is* enabled; it can take a fair amount of
+# time on certain systems; Toshiba Tecras are known to have this
+# problem.  The memory location used here (0x200) is the int 0x80
+# vector, which should be safe to use.
+
+       xorw    %ax, %ax                        # segment 0x0000
+       movw    %ax, %fs
+       decw    %ax                             # segment 0xffff (HMA)
+       movw    %ax, %gs
+a20_wait:
+       incw    %ax                             # unused memory location <0xfff0
+       movw    %ax, %fs:(0x200)                # we use the "int 0x80" vector
+       cmpw    %gs:(0x210), %ax                # and its corresponding HMA addr
+       je      a20_wait                        # loop until no longer aliased
+
+# make sure any possible coprocessor is properly reset..
+       xorw    %ax, %ax
+       outb    %al, $0xf0
+       call    delay
+
+       outb    %al, $0xf1
+       call    delay
+
+# well, that went ok, I hope. Now we mask all interrupts - the rest
+# is done in init_IRQ().
+       movb    $0xFF, %al                      # mask all interrupts for now
+       outb    %al, $0xA1
+       call    delay
+       
+       movb    $0xFB, %al                      # mask all irq's but irq2 which
+       outb    %al, $0x21                      # is cascaded
+
+# Well, that certainly wasn't fun :-(. Hopefully it works, and we don't
+# need no steenking BIOS anyway (except for the initial loading :-).
+# The BIOS-routine wants lots of unnecessary data, and it's less
+# "interesting" anyway. This is how REAL programmers do it.
+#
+# Well, now's the time to actually move into protected mode. To make
+# things as simple as possible, we do no register set-up or anything,
+# we let the gnu-compiled 32-bit programs do that. We just jump to
+# absolute address 0x1000 (or the loader supplied one),
+# in 32-bit protected mode.
+#
+# Note that the short jump isn't strictly needed, although there are
+# reasons why it might be a good idea. It won't hurt in any case.
+       movw    $1, %ax                         # protected mode (PE) bit
+       lmsw    %ax                             # This is it!
+       jmp     flush_instr
+
+flush_instr:
+       xorw    %bx, %bx                        # Flag to indicate a boot
+       xorl    %esi, %esi                      # Pointer to real-mode code
+       movw    %cs, %si
+       subw    $DELTA_INITSEG, %si
+       shll    $4, %esi                        # Convert to 32-bit pointer
+# NOTE: For high loaded big kernels we need a
+#      jmpi    0x100000,__KERNEL_CS
+#
+#      but we yet haven't reloaded the CS register, so the default size 
+#      of the target offset still is 16 bit.
+#       However, using an operant prefix (0x66), the CPU will properly
+#      take our 48 bit far pointer. (INTeL 80386 Programmer's Reference
+#      Manual, Mixing 16-bit and 32-bit code, page 16-6)
+
+       .byte 0x66, 0xea                        # prefix + jmpi-opcode
+code32:        .long   0x1000                          # will be set to 0x100000
+                                               # for big kernels
+       .word   __KERNEL_CS
+
+# Here's a bunch of information about your current kernel..
+kernel_version:        .ascii  UTS_RELEASE
+               .ascii  " ("
+               .ascii  LINUX_COMPILE_BY
+               .ascii  "@"
+               .ascii  LINUX_COMPILE_HOST
+               .ascii  ") "
+               .ascii  UTS_VERSION
+               .byte   0
+
+# This is the default real mode switch routine.
+# to be called just before protected mode transition
+default_switch:
+       cli                                     # no interrupts allowed !
+       movb    $0x80, %al                      # disable NMI for bootup
+                                               # sequence
+       outb    %al, $0x70
+       lret
+
+# This routine only gets called, if we get loaded by the simple
+# bootsect loader _and_ have a bzImage to load.
+# Because there is no place left in the 512 bytes of the boot sector,
+# we must emigrate to code space here.
+bootsect_helper:
+       cmpw    $0, %cs:bootsect_es
+       jnz     bootsect_second
+
+       movb    $0x20, %cs:type_of_loader
+       movw    %es, %ax
+       shrw    $4, %ax
+       movb    %ah, %cs:bootsect_src_base+2
+       movw    %es, %ax
+       movw    %ax, %cs:bootsect_es
+       subw    $SYSSEG, %ax
+       lret                                    # nothing else to do for now
+
+bootsect_second:
+       pushw   %cx
+       pushw   %si
+       pushw   %bx
+       testw   %bx, %bx                        # 64K full?
+       jne     bootsect_ex
+
+       movw    $0x8000, %cx                    # full 64K, INT15 moves words
+       pushw   %cs
+       popw    %es
+       movw    $bootsect_gdt, %si
+       movw    $0x8700, %ax
+       int     $0x15
+       jc      bootsect_panic                  # this, if INT15 fails
+
+       movw    %cs:bootsect_es, %es            # we reset %es to always point
+       incb    %cs:bootsect_dst_base+2         # to 0x10000
+bootsect_ex:
+       movb    %cs:bootsect_dst_base+2, %ah
+       shlb    $4, %ah                         # we now have the number of
+                                               # moved frames in %ax
+       xorb    %al, %al
+       popw    %bx
+       popw    %si
+       popw    %cx
+       lret
+
+bootsect_gdt:
+       .word   0, 0, 0, 0
+       .word   0, 0, 0, 0
+
+bootsect_src:
+       .word   0xffff
+
+bootsect_src_base:
+       .byte   0x00, 0x00, 0x01                # base = 0x010000
+       .byte   0x93                            # typbyte
+       .word   0                               # limit16,base24 =0
+
+bootsect_dst:
+       .word   0xffff
+
+bootsect_dst_base:
+       .byte   0x00, 0x00, 0x10                # base = 0x100000
+       .byte   0x93                            # typbyte
+       .word   0                               # limit16,base24 =0
+       .word   0, 0, 0, 0                      # BIOS CS
+       .word   0, 0, 0, 0                      # BIOS DS
+
+bootsect_es:
+       .word   0
+
+bootsect_panic:
+       pushw   %cs
+       popw    %ds
+       cld
+       leaw    bootsect_panic_mess, %si
+       call    prtstr
+       
+bootsect_panic_loop:
+       jmp     bootsect_panic_loop
+
+bootsect_panic_mess:
+       .string "INT15 refuses to access high mem, giving up."
+
+# This routine checks that the keyboard command queue is empty
+# (after emptying the output buffers)
+#
+# Some machines have delusions that the keyboard buffer is always full
+# with no keyboard attached...
+#
+# If there is no keyboard controller, we will usually get 0xff
+# to all the reads.  With each IO taking a microsecond and
+# a timeout of 100,000 iterations, this can take about half a
+# second ("delay" == outb to port 0x80). That should be ok,
+# and should also be plenty of time for a real keyboard controller
+# to empty.
+#
+
+empty_8042:
+       pushl   %ecx
+       movl    $100000, %ecx
+
+empty_8042_loop:
+       decl    %ecx
+       jz      empty_8042_end_loop
+
+       call    delay
+
+       inb     $0x64, %al                      # 8042 status port
+       testb   $1, %al                         # output buffer?
+       jz      no_output
+
+       call    delay
+       inb     $0x60, %al                      # read it
+       jmp     empty_8042_loop
+
+no_output:
+       testb   $2, %al                         # is input buffer full?
+       jnz     empty_8042_loop                 # yes - loop
+empty_8042_end_loop:
+       popl    %ecx
+       ret
+
+# Read the cmos clock. Return the seconds in al
+gettime:
+       pushw   %cx
+       movb    $0x02, %ah
+       int     $0x1a
+       movb    %dh, %al                        # %dh contains the seconds
+       andb    $0x0f, %al
+       movb    %dh, %ah
+       movb    $0x04, %cl
+       shrb    %cl, %ah
+       aad
+       popw    %cx
+       ret
+
+# Delay is needed after doing I/O
+delay:
+       outb    %al,$0x80
+       ret
+
+# Descriptor tables
+gdt:
+       .word   0, 0, 0, 0                      # dummy
+
+       .word   0, 0, 0, 0                      # unused
+
+       .word   0xFFFF                          # 4Gb - (0x100000*0x1000 = 4Gb)
+       .word   0                               # base address = 0
+       .word   0x9A00                          # code read/exec
+       .word   0x00CF                          # granularity = 4096, 386
+                                               #  (+5th nibble of limit)
+
+       .word   0xFFFF                          # 4Gb - (0x100000*0x1000 = 4Gb)
+       .word   0                               # base address = 0
+       .word   0x9200                          # data read/write
+       .word   0x00CF                          # granularity = 4096, 386
+                                               #  (+5th nibble of limit)
+# this is 64bit descriptor for code
+       .word   0xFFFF
+       .word   0
+       .word   0x9A00                          # code read/exec
+       .word   0x00AF                          # as above, but it is long mode and with D=0
+                                               # it does not seem to do the trick.
+
+idt_48:
+       .word   0                               # idt limit = 0
+       .word   0, 0                            # idt base = 0L
+gdt_48:
+       .word   0x8000                          # gdt limit=2048,
+                                               #  256 GDT entries
+
+       .word   0, 0                            # gdt base (filled in later)
+
+# Include video setup & detection code
+
+#include "video.S"
+
+# Setup signature -- must be last
+setup_sig1:    .word   SIG1
+setup_sig2:    .word   SIG2
+
+# After this point, there is some free space which is used by the video mode
+# handling code to store the temporary mode table (not used by the kernel).
+
+modelist:
+
+.text
+endtext:
+.data
+enddata:
+.bss
+endbss:
diff --git a/arch/x86_64/boot/tools/build.c b/arch/x86_64/boot/tools/build.c
new file mode 100644 (file)
index 0000000..2c231bd
--- /dev/null
@@ -0,0 +1,189 @@
+/*
+ *  $Id: build.c,v 1.3 2001/06/26 15:14:50 pavel Exp $
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 1997 Martin Mares
+ */
+
+/*
+ * This file builds a disk-image from three different files:
+ *
+ * - bootsect: exactly 512 bytes of 8086 machine code, loads the rest
+ * - setup: 8086 machine code, sets up system parm
+ * - system: 80386 code for actual system
+ *
+ * It does some checking that all files are of the correct type, and
+ * just writes the result to stdout, removing headers and padding to
+ * the right amount. It also writes some system data to stderr.
+ */
+
+/*
+ * Changes by tytso to allow root device specification
+ * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
+ * Cross compiling fixes by Gertjan van Wingerde, July 1996
+ * Rewritten by Martin Mares, April 1997
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <asm/boot.h>
+
+typedef unsigned char byte;
+typedef unsigned short word;
+typedef unsigned long u32;
+
+#define DEFAULT_MAJOR_ROOT 0
+#define DEFAULT_MINOR_ROOT 0
+
+/* Minimal number of setup sectors (see also bootsect.S) */
+#define SETUP_SECTS 4
+
+byte buf[1024];
+int fd;
+int is_big_kernel;
+
+void die(const char * str, ...)
+{
+       va_list args;
+       va_start(args, str);
+       vfprintf(stderr, str, args);
+       fputc('\n', stderr);
+       exit(1);
+}
+
+void file_open(const char *name)
+{
+       if ((fd = open(name, O_RDONLY, 0)) < 0)
+               die("Unable to open `%s': %m", name);
+}
+
+void usage(void)
+{
+       die("Usage: build [-b] bootsect setup system [rootdev] [> image]");
+}
+
+int main(int argc, char ** argv)
+{
+       unsigned int i, c, sz, setup_sectors;
+       u32 sys_size;
+       byte major_root, minor_root;
+       struct stat sb;
+
+       if (argc > 2 && !strcmp(argv[1], "-b"))
+         {
+           is_big_kernel = 1;
+           argc--, argv++;
+         }
+       if ((argc < 4) || (argc > 5))
+               usage();
+       if (argc > 4) {
+               if (!strcmp(argv[4], "CURRENT")) {
+                       if (stat("/", &sb)) {
+                               perror("/");
+                               die("Couldn't stat /");
+                       }
+                       major_root = major(sb.st_dev);
+                       minor_root = minor(sb.st_dev);
+               } else if (strcmp(argv[4], "FLOPPY")) {
+                       if (stat(argv[4], &sb)) {
+                               perror(argv[4]);
+                               die("Couldn't stat root device.");
+                       }
+                       major_root = major(sb.st_rdev);
+                       minor_root = minor(sb.st_rdev);
+               } else {
+                       major_root = 0;
+                       minor_root = 0;
+               }
+       } else {
+               major_root = DEFAULT_MAJOR_ROOT;
+               minor_root = DEFAULT_MINOR_ROOT;
+       }
+       fprintf(stderr, "Root device is (%d, %d)\n", major_root, minor_root);
+
+       file_open(argv[1]);
+       i = read(fd, buf, sizeof(buf));
+       fprintf(stderr,"Boot sector %d bytes.\n",i);
+       if (i != 512)
+               die("Boot block must be exactly 512 bytes");
+       if (buf[510] != 0x55 || buf[511] != 0xaa)
+               die("Boot block hasn't got boot flag (0xAA55)");
+       buf[508] = minor_root;
+       buf[509] = major_root;
+       if (write(1, buf, 512) != 512)
+               die("Write call failed");
+       close (fd);
+
+       file_open(argv[2]);                                 /* Copy the setup code */
+       for (i=0 ; (c=read(fd, buf, sizeof(buf)))>0 ; i+=c )
+               if (write(1, buf, c) != c)
+                       die("Write call failed");
+       if (c != 0)
+               die("read-error on `setup'");
+       close (fd);
+
+       setup_sectors = (i + 511) / 512;        /* Pad unused space with zeros */
+       /* for compatibility with ancient versions of LILO. */
+       if (setup_sectors < SETUP_SECTS)
+               setup_sectors = SETUP_SECTS;
+       fprintf(stderr, "Setup is %d bytes.\n", i);
+       memset(buf, 0, sizeof(buf));
+       while (i < setup_sectors * 512) {
+               c = setup_sectors * 512 - i;
+               if (c > sizeof(buf))
+                       c = sizeof(buf);
+               if (write(1, buf, c) != c)
+                       die("Write call failed");
+               i += c;
+       }
+
+       file_open(argv[3]);
+       if (fstat (fd, &sb))
+               die("Unable to stat `%s': %m", argv[3]);
+       sz = sb.st_size;
+       fprintf (stderr, "System is %d kB\n", sz/1024);
+       sys_size = (sz + 15) / 16;
+       /* 0x28000*16 = 2.5 MB, conservative estimate for the current maximum */
+       if (sys_size > (is_big_kernel ? 0x28000 : DEF_SYSSIZE))
+               die("System is too big. Try using %smodules.",
+                       is_big_kernel ? "" : "bzImage or ");
+       if (sys_size > 0xefff)
+               fprintf(stderr,"warning: kernel is too big for standalone boot "
+                   "from floppy\n");
+       while (sz > 0) {
+               int l, n;
+
+               l = (sz > sizeof(buf)) ? sizeof(buf) : sz;
+               if ((n=read(fd, buf, l)) != l) {
+                       if (n < 0)
+                               die("Error reading %s: %m", argv[3]);
+                       else
+                               die("%s: Unexpected EOF", argv[3]);
+               }
+               if (write(1, buf, l) != l)
+                       die("Write failed");
+               sz -= l;
+       }
+       close(fd);
+
+       if (lseek(1, 497, SEEK_SET) != 497)                 /* Write sizes to the bootsector */
+               die("Output: seek failed");
+       buf[0] = setup_sectors;
+       if (write(1, buf, 1) != 1)
+               die("Write of setup sector count failed");
+       if (lseek(1, 500, SEEK_SET) != 500)
+               die("Output: seek failed");
+       buf[0] = (sys_size & 0xff);
+       buf[1] = ((sys_size >> 8) & 0xff);
+       if (write(1, buf, 2) != 2)
+               die("Write of image length failed");
+
+       return 0;                                           /* Everything is OK */
+}
diff --git a/arch/x86_64/boot/video.S b/arch/x86_64/boot/video.S
new file mode 100644 (file)
index 0000000..1a49d97
--- /dev/null
@@ -0,0 +1,1934 @@
+/*     video.S
+ *
+ *     Display adapter & video mode setup, version 2.13 (14-May-99)
+ *
+ *     Copyright (C) 1995 -- 1998 Martin Mares <mj@ucw.cz>
+ *     Based on the original setup.S code (C) Linus Torvalds and Mats Anderson
+ *
+ *     Rewritten to use GNU 'as' by Chris Noe <stiker@northlink.com> May 1999
+ *
+ *     For further information, look at Documentation/svga.txt.
+ *
+ */
+
+#include <linux/config.h> /* for CONFIG_VIDEO_* */
+
+/* Enable autodetection of SVGA adapters and modes. */
+#undef CONFIG_VIDEO_SVGA
+
+/* Enable autodetection of VESA modes */
+#define CONFIG_VIDEO_VESA
+
+/* Enable compacting of mode table */
+#define CONFIG_VIDEO_COMPACT
+
+/* Retain screen contents when switching modes */
+#define CONFIG_VIDEO_RETAIN
+
+/* Enable local mode list */
+#undef CONFIG_VIDEO_LOCAL
+
+/* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */
+#undef CONFIG_VIDEO_400_HACK
+
+/* Hack that lets you force specific BIOS mode ID and specific dimensions */
+#undef CONFIG_VIDEO_GFX_HACK
+#define VIDEO_GFX_BIOS_AX 0x4f02       /* 800x600 on ThinkPad */
+#define VIDEO_GFX_BIOS_BX 0x0102
+#define VIDEO_GFX_DUMMY_RESOLUTION 0x6425      /* 100x37 */
+
+/* This code uses an extended set of video mode numbers. These include:
+ * Aliases for standard modes
+ *     NORMAL_VGA (-1)
+ *     EXTENDED_VGA (-2)
+ *     ASK_VGA (-3)
+ * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
+ * of compatibility when extending the table. These are between 0x00 and 0xff.
+ */
+#define VIDEO_FIRST_MENU 0x0000
+
+/* Standard BIOS video modes (BIOS number + 0x0100) */
+#define VIDEO_FIRST_BIOS 0x0100
+
+/* VESA BIOS video modes (VESA number + 0x0200) */
+#define VIDEO_FIRST_VESA 0x0200
+
+/* Video7 special modes (BIOS number + 0x0900) */
+#define VIDEO_FIRST_V7 0x0900
+
+/* Special video modes */
+#define VIDEO_FIRST_SPECIAL 0x0f00
+#define VIDEO_80x25 0x0f00
+#define VIDEO_8POINT 0x0f01
+#define VIDEO_80x43 0x0f02
+#define VIDEO_80x28 0x0f03
+#define VIDEO_CURRENT_MODE 0x0f04
+#define VIDEO_80x30 0x0f05
+#define VIDEO_80x34 0x0f06
+#define VIDEO_80x60 0x0f07
+#define VIDEO_GFX_HACK 0x0f08
+#define VIDEO_LAST_SPECIAL 0x0f09
+
+/* Video modes given by resolution */
+#define VIDEO_FIRST_RESOLUTION 0x1000
+
+/* The "recalculate timings" flag */
+#define VIDEO_RECALC 0x8000
+
+/* Positions of various video parameters passed to the kernel */
+/* (see also include/linux/tty.h) */
+#define PARAM_CURSOR_POS       0x00
+#define PARAM_VIDEO_PAGE       0x04
+#define PARAM_VIDEO_MODE       0x06
+#define PARAM_VIDEO_COLS       0x07
+#define PARAM_VIDEO_EGA_BX     0x0a
+#define PARAM_VIDEO_LINES      0x0e
+#define PARAM_HAVE_VGA         0x0f
+#define PARAM_FONT_POINTS      0x10
+
+#define PARAM_LFB_WIDTH                0x12
+#define PARAM_LFB_HEIGHT       0x14
+#define PARAM_LFB_DEPTH                0x16
+#define PARAM_LFB_BASE         0x18
+#define PARAM_LFB_SIZE         0x1c
+#define PARAM_LFB_LINELENGTH   0x24
+#define PARAM_LFB_COLORS       0x26
+#define PARAM_VESAPM_SEG       0x2e
+#define PARAM_VESAPM_OFF       0x30
+#define PARAM_LFB_PAGES                0x32
+
+
+/* Define DO_STORE according to CONFIG_VIDEO_RETAIN */
+#ifdef CONFIG_VIDEO_RETAIN
+#define DO_STORE call store_screen
+#else
+#define DO_STORE
+#endif /* CONFIG_VIDEO_RETAIN */
+
+# This is the main entry point called by setup.S
+# %ds *must* be pointing to the bootsector
+video: pushw   %ds             # We use different segments
+       pushw   %ds             # FS contains original DS
+       popw    %fs
+       pushw   %cs             # DS is equal to CS
+       popw    %ds
+       pushw   %cs             # ES is equal to CS
+       popw    %es
+       xorw    %ax, %ax
+       movw    %ax, %gs        # GS is zero
+       cld
+       call    basic_detect    # Basic adapter type testing (EGA/VGA/MDA/CGA)
+#ifdef CONFIG_VIDEO_SELECT
+       movw    %fs:(0x01fa), %ax               # User selected video mode
+       cmpw    $ASK_VGA, %ax                   # Bring up the menu
+       jz      vid2
+
+       call    mode_set                        # Set the mode
+       jc      vid1
+
+       leaw    badmdt, %si                     # Invalid mode ID
+       call    prtstr
+vid2:  call    mode_menu
+vid1:
+#ifdef CONFIG_VIDEO_RETAIN
+       call    restore_screen                  # Restore screen contents
+#endif /* CONFIG_VIDEO_RETAIN */
+#endif /* CONFIG_VIDEO_SELECT */
+       call    mode_params                     # Store mode parameters
+       popw    %ds                             # Restore original DS
+       ret
+
+# Detect if we have CGA, MDA, EGA or VGA and pass it to the kernel.
+basic_detect:
+       movb    $0, %fs:(PARAM_HAVE_VGA)
+       movb    $0x12, %ah      # Check EGA/VGA
+       movb    $0x10, %bl
+       int     $0x10
+       movw    %bx, %fs:(PARAM_VIDEO_EGA_BX)   # Identifies EGA to the kernel
+       cmpb    $0x10, %bl                      # No, it's a CGA/MDA/HGA card.
+       je      basret
+
+       incb    adapter
+       movw    $0x1a00, %ax                    # Check EGA or VGA?
+       int     $0x10
+       cmpb    $0x1a, %al                      # 1a means VGA...
+       jne     basret                          # anything else is EGA.
+       
+       incb    %fs:(PARAM_HAVE_VGA)            # We've detected a VGA
+       incb    adapter
+basret:        ret
+
+# Store the video mode parameters for later usage by the kernel.
+# This is done by asking the BIOS except for the rows/columns
+# parameters in the default 80x25 mode -- these are set directly,
+# because some very obscure BIOSes supply insane values.
+mode_params:
+#ifdef CONFIG_VIDEO_SELECT
+       cmpb    $0, graphic_mode
+       jnz     mopar_gr
+#endif
+       movb    $0x03, %ah                      # Read cursor position
+       xorb    %bh, %bh
+       int     $0x10
+       movw    %dx, %fs:(PARAM_CURSOR_POS)
+       movb    $0x0f, %ah                      # Read page/mode/width
+       int     $0x10
+       movw    %bx, %fs:(PARAM_VIDEO_PAGE)
+       movw    %ax, %fs:(PARAM_VIDEO_MODE)     # Video mode and screen width
+       cmpb    $0x7, %al                       # MDA/HGA => segment differs
+       jnz     mopar0
+
+       movw    $0xb000, video_segment
+mopar0: movw   %gs:(0x485), %ax                # Font size
+       movw    %ax, %fs:(PARAM_FONT_POINTS)    # (valid only on EGA/VGA)
+       movw    force_size, %ax                 # Forced size?
+       orw     %ax, %ax
+       jz      mopar1
+
+       movb    %ah, %fs:(PARAM_VIDEO_COLS)
+       movb    %al, %fs:(PARAM_VIDEO_LINES)
+       ret
+
+mopar1:        movb    $25, %al
+       cmpb    $0, adapter                     # If we are on CGA/MDA/HGA, the
+       jz      mopar2                          # screen must have 25 lines.
+
+       movb    %gs:(0x484), %al                # On EGA/VGA, use the EGA+ BIOS
+       incb    %al                             # location of max lines.
+mopar2: movb   %al, %fs:(PARAM_VIDEO_LINES)
+       ret
+
+#ifdef CONFIG_VIDEO_SELECT
+# Fetching of VESA frame buffer parameters
+mopar_gr:
+       leaw    modelist+1024, %di
+       movb    $0x23, %fs:(PARAM_HAVE_VGA)
+       movw    16(%di), %ax
+       movw    %ax, %fs:(PARAM_LFB_LINELENGTH)
+       movw    18(%di), %ax
+       movw    %ax, %fs:(PARAM_LFB_WIDTH)
+       movw    20(%di), %ax
+       movw    %ax, %fs:(PARAM_LFB_HEIGHT)
+       movb    25(%di), %al
+       movb    $0, %ah
+       movw    %ax, %fs:(PARAM_LFB_DEPTH)
+       movb    29(%di), %al    
+       movb    $0, %ah
+       movw    %ax, %fs:(PARAM_LFB_PAGES)
+       movl    40(%di), %eax
+       movl    %eax, %fs:(PARAM_LFB_BASE)
+       movl    31(%di), %eax
+       movl    %eax, %fs:(PARAM_LFB_COLORS)
+       movl    35(%di), %eax
+       movl    %eax, %fs:(PARAM_LFB_COLORS+4)
+
+# get video mem size
+       leaw    modelist+1024, %di
+       movw    $0x4f00, %ax
+       int     $0x10
+       xorl    %eax, %eax
+       movw    18(%di), %ax
+       movl    %eax, %fs:(PARAM_LFB_SIZE)
+# get protected mode interface informations
+       movw    $0x4f0a, %ax
+       xorw    %bx, %bx
+       xorw    %di, %di
+       int     $0x10
+       cmp     $0x004f, %ax
+       jnz     no_pm
+
+       movw    %es, %fs:(PARAM_VESAPM_SEG)
+       movw    %di, %fs:(PARAM_VESAPM_OFF)
+no_pm: ret
+
+# The video mode menu
+mode_menu:
+       leaw    keymsg, %si                     # "Return/Space/Timeout" message
+       call    prtstr
+       call    flush
+nokey: call    getkt
+
+       cmpb    $0x0d, %al                      # ENTER ?
+       je      listm                           # yes - manual mode selection
+
+       cmpb    $0x20, %al                      # SPACE ?
+       je      defmd1                          # no - repeat
+
+       call    beep
+       jmp     nokey
+
+defmd1:        ret                                     # No mode chosen? Default 80x25
+
+listm: call    mode_table                      # List mode table
+listm0:        leaw    name_bann, %si                  # Print adapter name
+       call    prtstr
+       movw    card_name, %si
+       orw     %si, %si
+       jnz     an2
+
+       movb    adapter, %al
+       leaw    old_name, %si
+       orb     %al, %al
+       jz      an1
+
+       leaw    ega_name, %si
+       decb    %al
+       jz      an1
+
+       leaw    vga_name, %si
+       jmp     an1
+
+an2:   call    prtstr
+       leaw    svga_name, %si
+an1:   call    prtstr
+       leaw    listhdr, %si                    # Table header
+       call    prtstr
+       movb    $0x30, %dl                      # DL holds mode number
+       leaw    modelist, %si
+lm1:   cmpw    $ASK_VGA, (%si)                 # End?
+       jz      lm2
+
+       movb    %dl, %al                        # Menu selection number
+       call    prtchr
+       call    prtsp2
+       lodsw
+       call    prthw                           # Mode ID
+       call    prtsp2
+       movb    0x1(%si), %al
+       call    prtdec                          # Rows
+       movb    $0x78, %al                      # the letter 'x'
+       call    prtchr
+       lodsw
+       call    prtdec                          # Columns
+       movb    $0x0d, %al                      # New line
+       call    prtchr
+       movb    $0x0a, %al
+       call    prtchr
+       incb    %dl                             # Next character
+       cmpb    $0x3a, %dl
+       jnz     lm1
+
+       movb    $0x61, %dl
+       jmp     lm1
+
+lm2:   leaw    prompt, %si                     # Mode prompt
+       call    prtstr
+       leaw    edit_buf, %di                   # Editor buffer
+lm3:   call    getkey
+       cmpb    $0x0d, %al                      # Enter?
+       jz      lment
+
+       cmpb    $0x08, %al                      # Backspace?
+       jz      lmbs
+
+       cmpb    $0x20, %al                      # Printable?
+       jc      lm3
+
+       cmpw    $edit_buf+4, %di                # Enough space?
+       jz      lm3
+
+       stosb
+       call    prtchr
+       jmp     lm3
+
+lmbs:  cmpw    $edit_buf, %di                  # Backspace
+       jz      lm3
+
+       decw    %di
+       movb    $0x08, %al
+       call    prtchr
+       call    prtspc
+       movb    $0x08, %al
+       call    prtchr
+       jmp     lm3
+       
+lment: movb    $0, (%di)
+       leaw    crlft, %si
+       call    prtstr
+       leaw    edit_buf, %si
+       cmpb    $0, (%si)                       # Empty string = default mode
+       jz      lmdef
+
+       cmpb    $0, 1(%si)                      # One character = menu selection
+       jz      mnusel
+
+       cmpw    $0x6373, (%si)                  # "scan" => mode scanning
+       jnz     lmhx
+
+       cmpw    $0x6e61, 2(%si)
+       jz      lmscan
+
+lmhx:  xorw    %bx, %bx                        # Else => mode ID in hex
+lmhex: lodsb
+       orb     %al, %al
+       jz      lmuse1
+
+       subb    $0x30, %al
+       jc      lmbad
+
+       cmpb    $10, %al
+       jc      lmhx1
+
+       subb    $7, %al
+       andb    $0xdf, %al
+       cmpb    $10, %al
+       jc      lmbad
+
+       cmpb    $16, %al
+       jnc     lmbad
+
+lmhx1: shlw    $4, %bx
+       orb     %al, %bl
+       jmp     lmhex
+
+lmuse1:        movw    %bx, %ax
+       jmp     lmuse
+
+mnusel:        lodsb                                   # Menu selection
+       xorb    %ah, %ah
+       subb    $0x30, %al
+       jc      lmbad
+
+       cmpb    $10, %al
+       jc      lmuse
+       
+       cmpb    $0x61-0x30, %al
+       jc      lmbad
+       
+       subb    $0x61-0x30-10, %al
+       cmpb    $36, %al
+       jnc     lmbad
+
+lmuse: call    mode_set
+       jc      lmdef
+
+lmbad: leaw    unknt, %si
+       call    prtstr
+       jmp     lm2
+lmscan:        cmpb    $0, adapter                     # Scanning only on EGA/VGA
+       jz      lmbad
+
+       movw    $0, mt_end                      # Scanning of modes is
+       movb    $1, scanning                    # done as new autodetection.
+       call    mode_table
+       jmp     listm0
+lmdef: ret
+
+# Additional parts of mode_set... (relative jumps, you know)
+setv7:                                         # Video7 extended modes
+       DO_STORE
+       subb    $VIDEO_FIRST_V7>>8, %bh
+       movw    $0x6f05, %ax
+       int     $0x10
+       stc
+       ret
+
+_setrec:       jmp     setrec                  # Ugly...
+_set_80x25:    jmp     set_80x25
+
+# Aliases for backward compatibility.
+setalias:
+       movw    $VIDEO_80x25, %ax
+       incw    %bx
+       jz      mode_set
+
+       movb    $VIDEO_8POINT-VIDEO_FIRST_SPECIAL, %al
+       incw    %bx
+       jnz     setbad                          # Fall-through!
+
+# Setting of user mode (AX=mode ID) => CF=success
+mode_set:
+       movw    %ax, %bx
+       cmpb    $0xff, %ah
+       jz      setalias
+
+       testb   $VIDEO_RECALC>>8, %ah
+       jnz     _setrec
+
+       cmpb    $VIDEO_FIRST_RESOLUTION>>8, %ah
+       jnc     setres
+       
+       cmpb    $VIDEO_FIRST_SPECIAL>>8, %ah
+       jz      setspc
+       
+       cmpb    $VIDEO_FIRST_V7>>8, %ah
+       jz      setv7
+       
+       cmpb    $VIDEO_FIRST_VESA>>8, %ah
+       jnc     check_vesa
+       
+       orb     %ah, %ah
+       jz      setmenu
+       
+       decb    %ah
+       jz      setbios
+
+setbad:        clc
+       movb    $0, do_restore                  # The screen needn't be restored
+       ret
+
+setvesa:
+       DO_STORE
+       subb    $VIDEO_FIRST_VESA>>8, %bh
+       movw    $0x4f02, %ax                    # VESA BIOS mode set call
+       int     $0x10
+       cmpw    $0x004f, %ax                    # AL=4f if implemented
+       jnz     setbad                          # AH=0 if OK
+
+       stc
+       ret
+
+setbios:
+       DO_STORE
+       int     $0x10                           # Standard BIOS mode set call
+       pushw   %bx
+       movb    $0x0f, %ah                      # Check if really set
+       int     $0x10
+       popw    %bx
+       cmpb    %bl, %al
+       jnz     setbad
+       
+       stc
+       ret
+
+setspc:        xorb    %bh, %bh                        # Set special mode
+       cmpb    $VIDEO_LAST_SPECIAL-VIDEO_FIRST_SPECIAL, %bl
+       jnc     setbad
+       
+       addw    %bx, %bx
+       jmp     *spec_inits(%bx)
+
+setmenu:
+       orb     %al, %al                        # 80x25 is an exception
+       jz      _set_80x25
+       
+       pushw   %bx                             # Set mode chosen from menu
+       call    mode_table                      # Build the mode table
+       popw    %ax
+       shlw    $2, %ax
+       addw    %ax, %si
+       cmpw    %di, %si
+       jnc     setbad
+       
+       movw    (%si), %ax                      # Fetch mode ID
+_m_s:  jmp     mode_set
+
+setres:        pushw   %bx                             # Set mode chosen by resolution
+       call    mode_table
+       popw    %bx
+       xchgb   %bl, %bh
+setr1: lodsw
+       cmpw    $ASK_VGA, %ax                   # End of the list?
+       jz      setbad
+       
+       lodsw
+       cmpw    %bx, %ax
+       jnz     setr1
+       
+       movw    -4(%si), %ax                    # Fetch mode ID
+       jmp     _m_s
+
+check_vesa:
+       leaw    modelist+1024, %di
+       subb    $VIDEO_FIRST_VESA>>8, %bh
+       movw    %bx, %cx                        # Get mode information structure
+       movw    $0x4f01, %ax
+       int     $0x10
+       addb    $VIDEO_FIRST_VESA>>8, %bh
+       cmpw    $0x004f, %ax
+       jnz     setbad
+
+       movb    (%di), %al                      # Check capabilities.
+       andb    $0x19, %al
+       cmpb    $0x09, %al
+       jz      setvesa                         # This is a text mode
+
+       movb    (%di), %al                      # Check capabilities.
+       andb    $0x99, %al
+       cmpb    $0x99, %al
+       jnz     _setbad                         # Doh! No linear frame buffer.
+
+       subb    $VIDEO_FIRST_VESA>>8, %bh
+       orw     $0x4000, %bx                    # Use linear frame buffer
+       movw    $0x4f02, %ax                    # VESA BIOS mode set call
+       int     $0x10
+       cmpw    $0x004f, %ax                    # AL=4f if implemented
+       jnz     _setbad                         # AH=0 if OK
+
+       movb    $1, graphic_mode                # flag graphic mode
+       movb    $0, do_restore                  # no screen restore
+       stc
+       ret
+
+_setbad:       jmp     setbad                  # Ugly...
+
+# Recalculate vertical display end registers -- this fixes various
+# inconsistencies of extended modes on many adapters. Called when
+# the VIDEO_RECALC flag is set in the mode ID.
+
+setrec:        subb    $VIDEO_RECALC>>8, %ah           # Set the base mode
+       call    mode_set
+       jnc     rct3
+
+       movw    %gs:(0x485), %ax                # Font size in pixels
+       movb    %gs:(0x484), %bl                # Number of rows
+       incb    %bl
+       mulb    %bl                             # Number of visible
+       decw    %ax                             # scan lines - 1
+       movw    $0x3d4, %dx
+       movw    %ax, %bx
+       movb    $0x12, %al                      # Lower 8 bits
+       movb    %bl, %ah
+       outw    %ax, %dx
+       movb    $0x07, %al              # Bits 8 and 9 in the overflow register
+       call    inidx
+       xchgb   %al, %ah
+       andb    $0xbd, %ah
+       shrb    %bh
+       jnc     rct1
+       orb     $0x02, %ah
+rct1:  shrb    %bh
+       jnc     rct2
+       orb     $0x40, %ah
+rct2:  movb    $0x07, %al
+       outw    %ax, %dx
+       stc
+rct3:  ret
+
+# Table of routines for setting of the special modes.
+spec_inits:
+       .word   set_80x25
+       .word   set_8pixel
+       .word   set_80x43
+       .word   set_80x28
+       .word   set_current
+       .word   set_80x30
+       .word   set_80x34
+       .word   set_80x60
+       .word   set_gfx
+
+# Set the 80x25 mode. If already set, do nothing.
+set_80x25:
+       movw    $0x5019, force_size             # Override possibly broken BIOS
+use_80x25:
+#ifdef CONFIG_VIDEO_400_HACK
+       movw    $0x1202, %ax                    # Force 400 scan lines
+       movb    $0x30, %bl
+       int     $0x10
+#else
+       movb    $0x0f, %ah                      # Get current mode ID
+       int     $0x10
+       cmpw    $0x5007, %ax    # Mode 7 (80x25 mono) is the only one available
+       jz      st80            # on CGA/MDA/HGA and is also available on EGAM
+
+       cmpw    $0x5003, %ax    # Unknown mode, force 80x25 color
+       jnz     force3
+
+st80:  cmpb    $0, adapter     # CGA/MDA/HGA => mode 3/7 is always 80x25
+       jz      set80
+
+       movb    %gs:(0x0484), %al       # This is EGA+ -- beware of 80x50 etc.
+       orb     %al, %al                # Some buggy BIOS'es set 0 rows
+       jz      set80
+       
+       cmpb    $24, %al                # It's hopefully correct
+       jz      set80
+#endif /* CONFIG_VIDEO_400_HACK */
+force3:        DO_STORE
+       movw    $0x0003, %ax                    # Forced set
+       int     $0x10
+set80: stc
+       ret
+
+# Set the 80x50/80x43 8-pixel mode. Simple BIOS calls.
+set_8pixel:
+       DO_STORE
+       call    use_80x25                       # The base is 80x25
+set_8pt:
+       movw    $0x1112, %ax                    # Use 8x8 font
+       xorb    %bl, %bl
+       int     $0x10
+       movw    $0x1200, %ax                    # Use alternate print screen
+       movb    $0x20, %bl
+       int     $0x10
+       movw    $0x1201, %ax                    # Turn off cursor emulation
+       movb    $0x34, %bl
+       int     $0x10
+       movb    $0x01, %ah                      # Define cursor scan lines 6-7
+       movw    $0x0607, %cx
+       int     $0x10
+set_current:
+       stc
+       ret
+
+# Set the 80x28 mode. This mode works on all VGA's, because it's a standard
+# 80x25 mode with 14-point fonts instead of 16-point.
+set_80x28:
+       DO_STORE
+       call    use_80x25                       # The base is 80x25
+set14: movw    $0x1111, %ax                    # Use 9x14 font
+       xorb    %bl, %bl
+       int     $0x10
+       movb    $0x01, %ah                      # Define cursor scan lines 11-12
+       movw    $0x0b0c, %cx
+       int     $0x10
+       stc
+       ret
+
+# Set the 80x43 mode. This mode is works on all VGA's.
+# It's a 350-scanline mode with 8-pixel font.
+set_80x43:
+       DO_STORE
+       movw    $0x1201, %ax                    # Set 350 scans
+       movb    $0x30, %bl
+       int     $0x10
+       movw    $0x0003, %ax                    # Reset video mode
+       int     $0x10
+       jmp     set_8pt                         # Use 8-pixel font
+
+# Set the 80x30 mode (all VGA's). 480 scanlines, 16-pixel font.
+set_80x30:
+       call    use_80x25                       # Start with real 80x25
+       DO_STORE
+       movw    $0x3cc, %dx                     # Get CRTC port
+       inb     %dx, %al
+       movb    $0xd4, %dl
+       rorb    %al                             # Mono or color?
+       jc      set48a
+
+       movb    $0xb4, %dl
+set48a:        movw    $0x0c11, %ax            # Vertical sync end (also unlocks CR0-7)
+       call    outidx
+       movw    $0x0b06, %ax                    # Vertical total
+       call    outidx
+       movw    $0x3e07, %ax                    # (Vertical) overflow
+       call    outidx
+       movw    $0xea10, %ax                    # Vertical sync start
+       call    outidx
+       movw    $0xdf12, %ax                    # Vertical display end
+       call    outidx
+       movw    $0xe715, %ax                    # Vertical blank start
+       call    outidx
+       movw    $0x0416, %ax                    # Vertical blank end
+       call    outidx
+       pushw   %dx
+       movb    $0xcc, %dl                      # Misc output register (read)
+       inb     %dx, %al
+       movb    $0xc2, %dl                      # (write)
+       andb    $0x0d, %al      # Preserve clock select bits and color bit
+       orb     $0xe2, %al                      # Set correct sync polarity
+       outb    %al, %dx
+       popw    %dx
+       movw    $0x501e, force_size
+       stc                                     # That's all.
+       ret
+
+# Set the 80x34 mode (all VGA's). 480 scans, 14-pixel font.
+set_80x34:
+       call    set_80x30                       # Set 480 scans
+       call    set14                           # And 14-pt font
+       movw    $0xdb12, %ax                    # VGA vertical display end
+       movw    $0x5022, force_size
+setvde:        call    outidx
+       stc
+       ret
+
+# Set the 80x60 mode (all VGA's). 480 scans, 8-pixel font.
+set_80x60:
+       call    set_80x30                       # Set 480 scans
+       call    set_8pt                         # And 8-pt font
+       movw    $0xdf12, %ax                    # VGA vertical display end
+       movw    $0x503c, force_size
+       jmp     setvde
+
+# Special hack for ThinkPad graphics
+set_gfx:
+#ifdef CONFIG_VIDEO_GFX_HACK
+       movw    $VIDEO_GFX_BIOS_AX, %ax
+       movw    $VIDEO_GFX_BIOS_BX, %bx
+       int     $0x10
+       movw    $VIDEO_GFX_DUMMY_RESOLUTION, force_size
+       stc
+#endif
+       ret
+
+#ifdef CONFIG_VIDEO_RETAIN
+
+# Store screen contents to temporary buffer.
+store_screen:
+       cmpb    $0, do_restore                  # Already stored?
+       jnz     stsr
+
+       testb   $CAN_USE_HEAP, loadflags        # Have we space for storing?
+       jz      stsr
+       
+       pushw   %ax
+       pushw   %bx
+       pushw   force_size                      # Don't force specific size
+       movw    $0, force_size
+       call    mode_params                     # Obtain params of current mode
+       popw    force_size
+       movb    %fs:(PARAM_VIDEO_LINES), %ah
+       movb    %fs:(PARAM_VIDEO_COLS), %al
+       movw    %ax, %bx                        # BX=dimensions
+       mulb    %ah
+       movw    %ax, %cx                        # CX=number of characters
+       addw    %ax, %ax                        # Calculate image size
+       addw    $modelist+1024+4, %ax
+       cmpw    heap_end_ptr, %ax
+       jnc     sts1                            # Unfortunately, out of memory
+
+       movw    %fs:(PARAM_CURSOR_POS), %ax     # Store mode params
+       leaw    modelist+1024, %di
+       stosw
+       movw    %bx, %ax
+       stosw
+       pushw   %ds                             # Store the screen
+       movw    video_segment, %ds
+       xorw    %si, %si
+       rep
+       movsw
+       popw    %ds
+       incb    do_restore                      # Screen will be restored later
+sts1:  popw    %bx
+       popw    %ax
+stsr:  ret
+
+# Restore screen contents from temporary buffer.
+restore_screen:
+       cmpb    $0, do_restore                  # Has the screen been stored?
+       jz      res1
+
+       call    mode_params                     # Get parameters of current mode
+       movb    %fs:(PARAM_VIDEO_LINES), %cl
+       movb    %fs:(PARAM_VIDEO_COLS), %ch
+       leaw    modelist+1024, %si              # Screen buffer
+       lodsw                                   # Set cursor position
+       movw    %ax, %dx
+       cmpb    %cl, %dh
+       jc      res2
+       
+       movb    %cl, %dh
+       decb    %dh
+res2:  cmpb    %ch, %dl
+       jc      res3
+       
+       movb    %ch, %dl
+       decb    %dl
+res3:  movb    $0x02, %ah
+       movb    $0x00, %bh
+       int     $0x10
+       lodsw                                   # Display size
+       movb    %ah, %dl                        # DL=number of lines
+       movb    $0, %ah                         # BX=phys. length of orig. line
+       movw    %ax, %bx
+       cmpb    %cl, %dl                        # Too many?
+       jc      res4
+
+       pushw   %ax
+       movb    %dl, %al
+       subb    %cl, %al
+       mulb    %bl
+       addw    %ax, %si
+       addw    %ax, %si
+       popw    %ax
+       movb    %cl, %dl
+res4:  cmpb    %ch, %al                        # Too wide?
+       jc      res5
+       
+       movb    %ch, %al                        # AX=width of src. line
+res5:  movb    $0, %cl
+       xchgb   %ch, %cl
+       movw    %cx, %bp                        # BP=width of dest. line
+       pushw   %es
+       movw    video_segment, %es
+       xorw    %di, %di                        # Move the data
+       addw    %bx, %bx                        # Convert BX and BP to _bytes_
+       addw    %bp, %bp
+res6:  pushw   %si
+       pushw   %di
+       movw    %ax, %cx
+       rep
+       movsw
+       popw    %di
+       popw    %si
+       addw    %bp, %di
+       addw    %bx, %si
+       decb    %dl
+       jnz     res6
+       
+       popw    %es                             # Done
+res1:  ret
+#endif /* CONFIG_VIDEO_RETAIN */
+
+# Write to indexed VGA register (AL=index, AH=data, DX=index reg. port)
+outidx:        outb    %al, %dx
+       pushw   %ax
+       movb    %ah, %al
+       incw    %dx
+       outb    %al, %dx
+       decw    %dx
+       popw    %ax
+       ret
+
+# Build the table of video modes (stored after the setup.S code at the
+# `modelist' label. Each video mode record looks like:
+#      .word   MODE-ID         (our special mode ID (see above))
+#      .byte   rows            (number of rows)
+#      .byte   columns         (number of columns)
+# Returns address of the end of the table in DI, the end is marked
+# with a ASK_VGA ID.
+mode_table:
+       movw    mt_end, %di                     # Already filled?
+       orw     %di, %di
+       jnz     mtab1x
+       
+       leaw    modelist, %di                   # Store standard modes:
+       movl    $VIDEO_80x25 + 0x50190000, %eax # The 80x25 mode (ALL)
+       stosl
+       movb    adapter, %al                    # CGA/MDA/HGA -- no more modes
+       orb     %al, %al
+       jz      mtabe
+       
+       decb    %al
+       jnz     mtabv
+       
+       movl    $VIDEO_8POINT + 0x502b0000, %eax        # The 80x43 EGA mode
+       stosl
+       jmp     mtabe
+
+mtab1x:        jmp     mtab1
+
+mtabv: leaw    vga_modes, %si                  # All modes for std VGA
+       movw    $vga_modes_end-vga_modes, %cx
+       rep     # I'm unable to use movsw as I don't know how to store a half
+       movsb   # of the expression above to cx without using explicit shr.
+
+       cmpb    $0, scanning                    # Mode scan requested?
+       jz      mscan1
+       
+       call    mode_scan
+mscan1:
+
+#ifdef CONFIG_VIDEO_LOCAL
+       call    local_modes
+#endif /* CONFIG_VIDEO_LOCAL */
+
+#ifdef CONFIG_VIDEO_VESA
+       call    vesa_modes                      # Detect VESA VGA modes
+#endif /* CONFIG_VIDEO_VESA */
+
+#ifdef CONFIG_VIDEO_SVGA
+       cmpb    $0, scanning                    # Bypass when scanning
+       jnz     mscan2
+       
+       call    svga_modes                      # Detect SVGA cards & modes
+mscan2:
+#endif /* CONFIG_VIDEO_SVGA */
+
+mtabe:
+
+#ifdef CONFIG_VIDEO_COMPACT
+       leaw    modelist, %si
+       movw    %di, %dx
+       movw    %si, %di
+cmt1:  cmpw    %dx, %si                        # Scan all modes
+       jz      cmt2
+
+       leaw    modelist, %bx                   # Find in previous entries
+       movw    2(%si), %cx
+cmt3:  cmpw    %bx, %si
+       jz      cmt4
+
+       cmpw    2(%bx), %cx                     # Found => don't copy this entry
+       jz      cmt5
+
+       addw    $4, %bx
+       jmp     cmt3
+
+cmt4:  movsl                                   # Copy entry
+       jmp     cmt1
+
+cmt5:  addw    $4, %si                         # Skip entry
+       jmp     cmt1
+
+cmt2:
+#endif /* CONFIG_VIDEO_COMPACT */
+
+       movw    $ASK_VGA, (%di)                 # End marker
+       movw    %di, mt_end
+mtab1: leaw    modelist, %si                   # SI=mode list, DI=list end
+ret0:  ret
+
+# Modes usable on all standard VGAs
+vga_modes:
+       .word   VIDEO_8POINT
+       .word   0x5032                          # 80x50
+       .word   VIDEO_80x43
+       .word   0x502b                          # 80x43
+       .word   VIDEO_80x28
+       .word   0x501c                          # 80x28
+       .word   VIDEO_80x30
+       .word   0x501e                          # 80x30
+       .word   VIDEO_80x34
+       .word   0x5022                          # 80x34
+       .word   VIDEO_80x60
+       .word   0x503c                          # 80x60
+#ifdef CONFIG_VIDEO_GFX_HACK
+       .word   VIDEO_GFX_HACK
+       .word   VIDEO_GFX_DUMMY_RESOLUTION
+#endif
+
+vga_modes_end:
+# Detect VESA modes.
+
+#ifdef CONFIG_VIDEO_VESA
+vesa_modes:
+       cmpb    $2, adapter                     # VGA only
+       jnz     ret0
+
+       movw    %di, %bp                        # BP=original mode table end
+       addw    $0x200, %di                     # Buffer space
+       movw    $0x4f00, %ax                    # VESA Get card info call
+       int     $0x10
+       movw    %bp, %di
+       cmpw    $0x004f, %ax                    # Successful?
+       jnz     ret0
+       
+       cmpw    $0x4556, 0x200(%di)
+       jnz     ret0
+       
+       cmpw    $0x4153, 0x202(%di)
+       jnz     ret0
+       
+       movw    $vesa_name, card_name           # Set name to "VESA VGA"
+       pushw   %gs
+       lgsw    0x20e(%di), %si                 # GS:SI=mode list
+       movw    $128, %cx                       # Iteration limit
+vesa1:
+# gas version 2.9.1, using BFD version 2.9.1.0.23 buggers the next inst.
+# XXX: lodsw   %gs:(%si), %ax                  # Get next mode in the list
+       gs; lodsw
+       cmpw    $0xffff, %ax                    # End of the table?
+       jz      vesar
+       
+       cmpw    $0x0080, %ax                    # Check validity of mode ID
+       jc      vesa2
+       
+       orb     %ah, %ah                # Valid IDs: 0x0000-0x007f/0x0100-0x07ff
+       jz      vesan                   # Certain BIOSes report 0x80-0xff!
+
+       cmpw    $0x0800, %ax
+       jnc     vesae
+
+vesa2: pushw   %cx
+       movw    %ax, %cx                        # Get mode information structure
+       movw    $0x4f01, %ax
+       int     $0x10
+       movw    %cx, %bx                        # BX=mode number
+       addb    $VIDEO_FIRST_VESA>>8, %bh
+       popw    %cx
+       cmpw    $0x004f, %ax
+       jnz     vesan                   # Don't report errors (buggy BIOSES)
+
+       movb    (%di), %al                      # Check capabilities. We require
+       andb    $0x19, %al                      # a color text mode.
+       cmpb    $0x09, %al
+       jnz     vesan
+       
+       cmpw    $0xb800, 8(%di)         # Standard video memory address required
+       jnz     vesan
+
+       testb   $2, (%di)                       # Mode characteristics supplied?
+       movw    %bx, (%di)                      # Store mode number
+       jz      vesa3
+       
+       xorw    %dx, %dx
+       movw    0x12(%di), %bx                  # Width
+       orb     %bh, %bh
+       jnz     vesan
+       
+       movb    %bl, 0x3(%di)
+       movw    0x14(%di), %ax                  # Height
+       orb     %ah, %ah
+       jnz     vesan
+       
+       movb    %al, 2(%di)
+       mulb    %bl
+       cmpw    $8193, %ax              # Small enough for Linux console driver?
+       jnc     vesan
+
+       jmp     vesaok
+
+vesa3: subw    $0x8108, %bx    # This mode has no detailed info specified,
+       jc      vesan           # so it must be a standard VESA mode.
+
+       cmpw    $5, %bx
+       jnc     vesan
+
+       movw    vesa_text_mode_table(%bx), %ax
+       movw    %ax, 2(%di)
+vesaok:        addw    $4, %di                         # The mode is valid. Store it.
+vesan: loop    vesa1                   # Next mode. Limit exceeded => error
+vesae: leaw    vesaer, %si
+       call    prtstr
+       movw    %bp, %di                        # Discard already found modes.
+vesar: popw    %gs
+       ret
+
+# Dimensions of standard VESA text modes
+vesa_text_mode_table:
+       .byte   60, 80                          # 0108
+       .byte   25, 132                         # 0109
+       .byte   43, 132                         # 010A
+       .byte   50, 132                         # 010B
+       .byte   60, 132                         # 010C
+#endif /* CONFIG_VIDEO_VESA */
+
+# Scan for video modes. A bit dirty, but should work.
+mode_scan:
+       movw    $0x0100, %cx                    # Start with mode 0
+scm1:  movb    $0, %ah                         # Test the mode
+       movb    %cl, %al
+       int     $0x10
+       movb    $0x0f, %ah
+       int     $0x10
+       cmpb    %cl, %al
+       jnz     scm2                            # Mode not set
+
+       movw    $0x3c0, %dx                     # Test if it's a text mode
+       movb    $0x10, %al                      # Mode bits
+       call    inidx
+       andb    $0x03, %al
+       jnz     scm2
+       
+       movb    $0xce, %dl                      # Another set of mode bits
+       movb    $0x06, %al
+       call    inidx
+       shrb    %al
+       jc      scm2
+       
+       movb    $0xd4, %dl                      # Cursor location
+       movb    $0x0f, %al
+       call    inidx
+       orb     %al, %al
+       jnz     scm2
+       
+       movw    %cx, %ax                        # Ok, store the mode
+       stosw
+       movb    %gs:(0x484), %al                # Number of rows
+       incb    %al
+       stosb
+       movw    %gs:(0x44a), %ax                # Number of columns
+       stosb
+scm2:  incb    %cl
+       jns     scm1
+       
+       movw    $0x0003, %ax                    # Return back to mode 3
+       int     $0x10
+       ret
+
+tstidx:        outw    %ax, %dx                        # OUT DX,AX and inidx
+inidx: outb    %al, %dx                        # Read from indexed VGA register
+       incw    %dx                     # AL=index, DX=index reg port -> AL=data
+       inb     %dx, %al
+       decw    %dx
+       ret
+
+# Try to detect type of SVGA card and supply (usually approximate) video
+# mode table for it.
+
+#ifdef CONFIG_VIDEO_SVGA
+svga_modes:
+       leaw    svga_table, %si                 # Test all known SVGA adapters
+dosvga:        lodsw
+       movw    %ax, %bp                        # Default mode table
+       orw     %ax, %ax
+       jz      didsv1
+
+       lodsw                                   # Pointer to test routine
+       pushw   %si
+       pushw   %di
+       pushw   %es
+       movw    $0xc000, %bx
+       movw    %bx, %es
+       call    *%ax                            # Call test routine
+       popw    %es
+       popw    %di
+       popw    %si
+       orw     %bp, %bp
+       jz      dosvga
+       
+       movw    %bp, %si                        # Found, copy the modes
+       movb    svga_prefix, %ah
+cpsvga:        lodsb
+       orb     %al, %al
+       jz      didsv
+       
+       stosw
+       movsw
+       jmp     cpsvga
+
+didsv: movw    %si, card_name                  # Store pointer to card name
+didsv1:        ret
+
+# Table of all known SVGA cards. For each card, we store a pointer to
+# a table of video modes supported by the card and a pointer to a routine
+# used for testing of presence of the card. The video mode table is always
+# followed by the name of the card or the chipset.
+svga_table:
+       .word   ati_md, ati_test
+       .word   oak_md, oak_test
+       .word   paradise_md, paradise_test
+       .word   realtek_md, realtek_test
+       .word   s3_md, s3_test
+       .word   chips_md, chips_test
+       .word   video7_md, video7_test
+       .word   cirrus5_md, cirrus5_test
+       .word   cirrus6_md, cirrus6_test
+       .word   cirrus1_md, cirrus1_test
+       .word   ahead_md, ahead_test
+       .word   everex_md, everex_test
+       .word   genoa_md, genoa_test
+       .word   trident_md, trident_test
+       .word   tseng_md, tseng_test
+       .word   0
+
+# Test routines and mode tables:
+
+# S3 - The test algorithm was taken from the SuperProbe package
+# for XFree86 1.2.1. Report bugs to Christoph.Niemann@linux.org
+s3_test:
+       movw    $0x0f35, %cx    # we store some constants in cl/ch
+       movw    $0x03d4, %dx
+       movb    $0x38, %al
+       call    inidx
+       movb    %al, %bh        # store current CRT-register 0x38
+       movw    $0x0038, %ax
+       call    outidx          # disable writing to special regs
+       movb    %cl, %al        # check whether we can write special reg 0x35
+       call    inidx
+       movb    %al, %bl        # save the current value of CRT reg 0x35
+       andb    $0xf0, %al      # clear bits 0-3
+       movb    %al, %ah
+       movb    %cl, %al        # and write it to CRT reg 0x35
+       call    outidx
+       call    inidx           # now read it back
+       andb    %ch, %al        # clear the upper 4 bits
+       jz      s3_2            # the first test failed. But we have a
+
+       movb    %bl, %ah        # second chance
+       movb    %cl, %al
+       call    outidx
+       jmp     s3_1            # do the other tests
+
+s3_2:  movw    %cx, %ax        # load ah with 0xf and al with 0x35
+       orb     %bl, %ah        # set the upper 4 bits of ah with the orig value
+       call    outidx          # write ...
+       call    inidx           # ... and reread 
+       andb    %cl, %al        # turn off the upper 4 bits
+       pushw   %ax
+       movb    %bl, %ah        # restore old value in register 0x35
+       movb    %cl, %al
+       call    outidx
+       popw    %ax
+       cmpb    %ch, %al        # setting lower 4 bits was successful => bad
+       je      no_s3           # writing is allowed => this is not an S3
+
+s3_1:  movw    $0x4838, %ax    # allow writing to special regs by putting
+       call    outidx          # magic number into CRT-register 0x38
+       movb    %cl, %al        # check whether we can write special reg 0x35
+       call    inidx
+       movb    %al, %bl
+       andb    $0xf0, %al
+       movb    %al, %ah
+       movb    %cl, %al
+       call    outidx
+       call    inidx
+       andb    %ch, %al
+       jnz     no_s3           # no, we can't write => no S3
+
+       movw    %cx, %ax
+       orb     %bl, %ah
+       call    outidx
+       call    inidx
+       andb    %ch, %al
+       pushw   %ax
+       movb    %bl, %ah        # restore old value in register 0x35
+       movb    %cl, %al
+       call    outidx
+       popw    %ax
+       cmpb    %ch, %al
+       jne     no_s31          # writing not possible => no S3
+       movb    $0x30, %al
+       call    inidx           # now get the S3 id ...
+       leaw    idS3, %di
+       movw    $0x10, %cx
+       repne
+       scasb
+       je      no_s31
+
+       movb    %bh, %ah
+       movb    $0x38, %al
+       jmp     s3rest
+
+no_s3: movb    $0x35, %al      # restore CRT register 0x35
+       movb    %bl, %ah
+       call    outidx
+no_s31:        xorw    %bp, %bp        # Detection failed
+s3rest:        movb    %bh, %ah
+       movb    $0x38, %al      # restore old value of CRT register 0x38
+       jmp     outidx
+
+idS3:  .byte   0x81, 0x82, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95
+       .byte   0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa8, 0xb0
+
+s3_md: .byte   0x54, 0x2b, 0x84
+       .byte   0x55, 0x19, 0x84
+       .byte   0
+       .ascii  "S3"
+       .byte   0
+
+# ATI cards.
+ati_test:
+       leaw    idati, %si
+       movw    $0x31, %di
+       movw    $0x09, %cx
+       repe
+       cmpsb
+       je      atiok
+
+       xorw    %bp, %bp
+atiok: ret
+
+idati: .ascii  "761295520"
+
+ati_md:        .byte   0x23, 0x19, 0x84
+       .byte   0x33, 0x2c, 0x84
+       .byte   0x22, 0x1e, 0x64
+       .byte   0x21, 0x19, 0x64
+       .byte   0x58, 0x21, 0x50
+       .byte   0x5b, 0x1e, 0x50
+       .byte   0
+       .ascii  "ATI"
+       .byte   0
+
+# AHEAD
+ahead_test:
+       movw    $0x200f, %ax
+       movw    $0x3ce, %dx
+       outw    %ax, %dx
+       incw    %dx
+       inb     %dx, %al
+       cmpb    $0x20, %al
+       je      isahed
+
+       cmpb    $0x21, %al
+       je      isahed
+       
+       xorw    %bp, %bp
+isahed:        ret
+
+ahead_md:
+       .byte   0x22, 0x2c, 0x84
+       .byte   0x23, 0x19, 0x84
+       .byte   0x24, 0x1c, 0x84
+       .byte   0x2f, 0x32, 0xa0
+       .byte   0x32, 0x22, 0x50
+       .byte   0x34, 0x42, 0x50
+       .byte   0
+       .ascii  "Ahead"
+       .byte   0
+
+# Chips & Tech.
+chips_test:
+       movw    $0x3c3, %dx
+       inb     %dx, %al
+       orb     $0x10, %al
+       outb    %al, %dx
+       movw    $0x104, %dx
+       inb     %dx, %al
+       movb    %al, %bl
+       movw    $0x3c3, %dx
+       inb     %dx, %al
+       andb    $0xef, %al
+       outb    %al, %dx
+       cmpb    $0xa5, %bl
+       je      cantok
+       
+       xorw    %bp, %bp
+cantok:        ret
+
+chips_md:
+       .byte   0x60, 0x19, 0x84
+       .byte   0x61, 0x32, 0x84
+       .byte   0
+       .ascii  "Chips & Technologies"
+       .byte   0
+
+# Cirrus Logic 5X0
+cirrus1_test:
+       movw    $0x3d4, %dx
+       movb    $0x0c, %al
+       outb    %al, %dx
+       incw    %dx
+       inb     %dx, %al
+       movb    %al, %bl
+       xorb    %al, %al
+       outb    %al, %dx
+       decw    %dx
+       movb    $0x1f, %al
+       outb    %al, %dx
+       incw    %dx
+       inb     %dx, %al
+       movb    %al, %bh
+       xorb    %ah, %ah
+       shlb    $4, %al
+       movw    %ax, %cx
+       movb    %bh, %al
+       shrb    $4, %al
+       addw    %ax, %cx
+       shlw    $8, %cx
+       addw    $6, %cx
+       movw    %cx, %ax
+       movw    $0x3c4, %dx
+       outw    %ax, %dx
+       incw    %dx
+       inb     %dx, %al
+       andb    %al, %al
+       jnz     nocirr
+       
+       movb    %bh, %al
+       outb    %al, %dx
+       inb     %dx, %al
+       cmpb    $0x01, %al
+       je      iscirr
+
+nocirr:        xorw    %bp, %bp
+iscirr: movw   $0x3d4, %dx
+       movb    %bl, %al
+       xorb    %ah, %ah
+       shlw    $8, %ax
+       addw    $0x0c, %ax
+       outw    %ax, %dx
+       ret
+
+cirrus1_md:
+       .byte   0x1f, 0x19, 0x84
+       .byte   0x20, 0x2c, 0x84
+       .byte   0x22, 0x1e, 0x84
+       .byte   0x31, 0x25, 0x64
+       .byte   0
+       .ascii  "Cirrus Logic 5X0"
+       .byte   0
+
+# Cirrus Logic 54XX
+cirrus5_test:
+       movw    $0x3c4, %dx
+       movb    $6, %al
+       call    inidx
+       movb    %al, %bl                        # BL=backup
+       movw    $6, %ax
+       call    tstidx
+       cmpb    $0x0f, %al
+       jne     c5fail
+       
+       movw    $0x1206, %ax
+       call    tstidx
+       cmpb    $0x12, %al
+       jne     c5fail
+       
+       movb    $0x1e, %al
+       call    inidx
+       movb    %al, %bh
+       movb    %bh, %ah
+       andb    $0xc0, %ah
+       movb    $0x1e, %al
+       call    tstidx
+       andb    $0x3f, %al
+       jne     c5xx
+       
+       movb    $0x1e, %al
+       movb    %bh, %ah
+       orb     $0x3f, %ah
+       call    tstidx
+       xorb    $0x3f, %al
+       andb    $0x3f, %al
+c5xx:  pushf
+       movb    $0x1e, %al
+       movb    %bh, %ah
+       outw    %ax, %dx
+       popf
+       je      c5done
+
+c5fail:        xorw    %bp, %bp
+c5done:        movb    $6, %al
+       movb    %bl, %ah
+       outw    %ax, %dx
+       ret
+
+cirrus5_md:
+       .byte   0x14, 0x19, 0x84
+       .byte   0x54, 0x2b, 0x84
+       .byte   0
+       .ascii  "Cirrus Logic 54XX"
+       .byte   0
+
+# Cirrus Logic 64XX -- no known extra modes, but must be identified, because
+# it's misidentified by the Ahead test.
+cirrus6_test:
+       movw    $0x3ce, %dx
+       movb    $0x0a, %al
+       call    inidx
+       movb    %al, %bl        # BL=backup
+       movw    $0xce0a, %ax
+       call    tstidx
+       orb     %al, %al
+       jne     c2fail
+       
+       movw    $0xec0a, %ax
+       call    tstidx
+       cmpb    $0x01, %al
+       jne     c2fail
+       
+       movb    $0xaa, %al
+       call    inidx           # 4X, 5X, 7X and 8X are valid 64XX chip ID's. 
+       shrb    $4, %al
+       subb    $4, %al
+       jz      c6done
+       
+       decb    %al
+       jz      c6done
+       
+       subb    $2, %al
+       jz      c6done
+       
+       decb    %al
+       jz      c6done
+       
+c2fail:        xorw    %bp, %bp
+c6done:        movb    $0x0a, %al
+       movb    %bl, %ah
+       outw    %ax, %dx
+       ret
+
+cirrus6_md:
+       .byte   0
+       .ascii  "Cirrus Logic 64XX"
+       .byte   0
+
+# Everex / Trident
+everex_test:
+       movw    $0x7000, %ax
+       xorw    %bx, %bx
+       int     $0x10
+       cmpb    $0x70, %al
+       jne     noevrx
+       
+       shrw    $4, %dx
+       cmpw    $0x678, %dx
+       je      evtrid
+       
+       cmpw    $0x236, %dx
+       jne     evrxok
+
+evtrid:        leaw    trident_md, %bp
+evrxok:        ret
+
+noevrx:        xorw    %bp, %bp
+       ret
+
+everex_md:
+       .byte   0x03, 0x22, 0x50
+       .byte   0x04, 0x3c, 0x50
+       .byte   0x07, 0x2b, 0x64
+       .byte   0x08, 0x4b, 0x64
+       .byte   0x0a, 0x19, 0x84
+       .byte   0x0b, 0x2c, 0x84
+       .byte   0x16, 0x1e, 0x50
+       .byte   0x18, 0x1b, 0x64
+       .byte   0x21, 0x40, 0xa0
+       .byte   0x40, 0x1e, 0x84
+       .byte   0
+       .ascii  "Everex/Trident"
+       .byte   0
+
+# Genoa.
+genoa_test:
+       leaw    idgenoa, %si                    # Check Genoa 'clues'
+       xorw    %ax, %ax
+       movb    %es:(0x37), %al
+       movw    %ax, %di
+       movw    $0x04, %cx
+       decw    %si
+       decw    %di
+l1:    incw    %si
+       incw    %di
+       movb    (%si), %al
+       testb   %al, %al
+       jz      l2
+
+       cmpb    %es:(%di), %al
+l2:    loope   l1
+       orw     %cx, %cx
+       je      isgen
+       
+       xorw    %bp, %bp
+isgen: ret
+
+idgenoa: .byte 0x77, 0x00, 0x99, 0x66
+
+genoa_md:
+       .byte   0x58, 0x20, 0x50
+       .byte   0x5a, 0x2a, 0x64
+       .byte   0x60, 0x19, 0x84
+       .byte   0x61, 0x1d, 0x84
+       .byte   0x62, 0x20, 0x84
+       .byte   0x63, 0x2c, 0x84
+       .byte   0x64, 0x3c, 0x84
+       .byte   0x6b, 0x4f, 0x64
+       .byte   0x72, 0x3c, 0x50
+       .byte   0x74, 0x42, 0x50
+       .byte   0x78, 0x4b, 0x64
+       .byte   0
+       .ascii  "Genoa"
+       .byte   0
+
+# OAK
+oak_test:
+       leaw    idoakvga, %si
+       movw    $0x08, %di
+       movw    $0x08, %cx
+       repe
+       cmpsb
+       je      isoak
+       
+       xorw    %bp, %bp
+isoak: ret
+
+idoakvga: .ascii  "OAK VGA "
+
+oak_md: .byte  0x4e, 0x3c, 0x50
+       .byte   0x4f, 0x3c, 0x84
+       .byte   0x50, 0x19, 0x84
+       .byte   0x51, 0x2b, 0x84
+       .byte   0
+       .ascii  "OAK"
+       .byte   0
+
+# WD Paradise.
+paradise_test:
+       leaw    idparadise, %si
+       movw    $0x7d, %di
+       movw    $0x04, %cx
+       repe
+       cmpsb
+       je      ispara
+       
+       xorw    %bp, %bp
+ispara:        ret
+
+idparadise:    .ascii  "VGA="
+
+paradise_md:
+       .byte   0x41, 0x22, 0x50
+       .byte   0x47, 0x1c, 0x84
+       .byte   0x55, 0x19, 0x84
+       .byte   0x54, 0x2c, 0x84
+       .byte   0
+       .ascii  "Paradise"
+       .byte   0
+
+# Trident.
+trident_test:
+       movw    $0x3c4, %dx
+       movb    $0x0e, %al
+       outb    %al, %dx
+       incw    %dx
+       inb     %dx, %al
+       xchgb   %al, %ah
+       xorb    %al, %al
+       outb    %al, %dx
+       inb     %dx, %al
+       xchgb   %ah, %al
+       movb    %al, %bl        # Strange thing ... in the book this wasn't
+       andb    $0x02, %bl      # necessary but it worked on my card which
+       jz      setb2           # is a trident. Without it the screen goes
+                               # blurred ...
+       andb    $0xfd, %al
+       jmp     clrb2           
+
+setb2: orb     $0x02, %al      
+clrb2: outb    %al, %dx
+       andb    $0x0f, %ah
+       cmpb    $0x02, %ah
+       je      istrid
+
+       xorw    %bp, %bp
+istrid:        ret
+
+trident_md:
+       .byte   0x50, 0x1e, 0x50
+       .byte   0x51, 0x2b, 0x50
+       .byte   0x52, 0x3c, 0x50
+       .byte   0x57, 0x19, 0x84
+       .byte   0x58, 0x1e, 0x84
+       .byte   0x59, 0x2b, 0x84
+       .byte   0x5a, 0x3c, 0x84
+       .byte   0
+       .ascii  "Trident"
+       .byte   0
+
+# Tseng.
+tseng_test:
+       movw    $0x3cd, %dx
+       inb     %dx, %al        # Could things be this simple ! :-)
+       movb    %al, %bl
+       movb    $0x55, %al
+       outb    %al, %dx
+       inb     %dx, %al
+       movb    %al, %ah
+       movb    %bl, %al
+       outb    %al, %dx
+       cmpb    $0x55, %ah
+       je      istsen
+
+isnot: xorw    %bp, %bp
+istsen:        ret
+
+tseng_md:
+       .byte   0x26, 0x3c, 0x50
+       .byte   0x2a, 0x28, 0x64
+       .byte   0x23, 0x19, 0x84
+       .byte   0x24, 0x1c, 0x84
+       .byte   0x22, 0x2c, 0x84
+       .byte   0x21, 0x3c, 0x84
+       .byte   0
+       .ascii  "Tseng"
+       .byte   0
+
+# Video7.
+video7_test:
+       movw    $0x3cc, %dx
+       inb     %dx, %al
+       movw    $0x3b4, %dx
+       andb    $0x01, %al
+       jz      even7
+
+       movw    $0x3d4, %dx
+even7: movb    $0x0c, %al
+       outb    %al, %dx
+       incw    %dx
+       inb     %dx, %al
+       movb    %al, %bl
+       movb    $0x55, %al
+       outb    %al, %dx
+       inb     %dx, %al
+       decw    %dx
+       movb    $0x1f, %al
+       outb    %al, %dx
+       incw    %dx
+       inb     %dx, %al
+       movb    %al, %bh
+       decw    %dx
+       movb    $0x0c, %al
+       outb    %al, %dx
+       incw    %dx
+       movb    %bl, %al
+       outb    %al, %dx
+       movb    $0x55, %al
+       xorb    $0xea, %al
+       cmpb    %bh, %al
+       jne     isnot
+       
+       movb    $VIDEO_FIRST_V7>>8, svga_prefix # Use special mode switching
+       ret
+
+video7_md:
+       .byte   0x40, 0x2b, 0x50
+       .byte   0x43, 0x3c, 0x50
+       .byte   0x44, 0x3c, 0x64
+       .byte   0x41, 0x19, 0x84
+       .byte   0x42, 0x2c, 0x84
+       .byte   0x45, 0x1c, 0x84
+       .byte   0
+       .ascii  "Video 7"
+       .byte   0
+
+# Realtek VGA
+realtek_test:
+       leaw    idrtvga, %si
+       movw    $0x45, %di
+       movw    $0x0b, %cx
+       repe
+       cmpsb
+       je      isrt
+       
+       xorw    %bp, %bp
+isrt:  ret
+
+idrtvga:       .ascii  "REALTEK VGA"
+
+realtek_md:
+       .byte   0x1a, 0x3c, 0x50
+       .byte   0x1b, 0x19, 0x84
+       .byte   0x1c, 0x1e, 0x84
+       .byte   0x1d, 0x2b, 0x84
+       .byte   0x1e, 0x3c, 0x84
+       .byte   0
+       .ascii  "REALTEK"
+       .byte   0
+
+#endif /* CONFIG_VIDEO_SVGA */
+
+# User-defined local mode table (VGA only)
+#ifdef CONFIG_VIDEO_LOCAL
+local_modes:
+       leaw    local_mode_table, %si
+locm1: lodsw
+       orw     %ax, %ax
+       jz      locm2
+       
+       stosw
+       movsw
+       jmp     locm1
+
+locm2: ret
+
+# This is the table of local video modes which can be supplied manually
+# by the user. Each entry consists of mode ID (word) and dimensions
+# (byte for column count and another byte for row count). These modes
+# are placed before all SVGA and VESA modes and override them if table
+# compacting is enabled. The table must end with a zero word followed
+# by NUL-terminated video adapter name.
+local_mode_table:
+       .word   0x0100                          # Example: 40x25
+       .byte   25,40
+       .word   0
+       .ascii  "Local"
+       .byte   0
+#endif /* CONFIG_VIDEO_LOCAL */
+
+# Read a key and return the ASCII code in al, scan code in ah
+getkey:        xorb    %ah, %ah
+       int     $0x16
+       ret
+
+# Read a key with a timeout of 30 seconds.
+# The hardware clock is used to get the time.
+getkt: call    gettime
+       addb    $30, %al                        # Wait 30 seconds
+       cmpb    $60, %al
+       jl      lminute
+
+       subb    $60, %al
+lminute:
+       movb    %al, %cl
+again: movb    $0x01, %ah
+       int     $0x16
+       jnz     getkey                          # key pressed, so get it
+
+       call    gettime
+       cmpb    %cl, %al
+       jne     again
+
+       movb    $0x20, %al                      # timeout, return `space'
+       ret
+
+# Flush the keyboard buffer
+flush: movb    $0x01, %ah
+       int     $0x16
+       jz      empty
+       
+       xorb    %ah, %ah
+       int     $0x16
+       jmp     flush
+
+empty: ret
+
+# Print hexadecimal number.
+prthw: pushw   %ax
+       movb    %ah, %al
+       call    prthb
+       popw    %ax
+prthb: pushw   %ax
+       shrb    $4, %al
+       call    prthn
+       popw    %ax
+       andb    $0x0f, %al
+prthn: cmpb    $0x0a, %al
+       jc      prth1
+
+       addb    $0x07, %al
+prth1: addb    $0x30, %al
+       jmp     prtchr
+
+# Print decimal number in al
+prtdec:        pushw   %ax
+       pushw   %cx
+       xorb    %ah, %ah
+       movb    $0x0a, %cl
+       idivb   %cl
+       cmpb    $0x09, %al
+       jbe     lt100
+
+       call    prtdec
+       jmp     skip10
+
+lt100: addb    $0x30, %al
+       call    prtchr
+skip10:        movb    %ah, %al
+       addb    $0x30, %al
+       call    prtchr  
+       popw    %cx
+       popw    %ax
+       ret
+
+# VIDEO_SELECT-only variables
+mt_end:                .word   0       # End of video mode table if built
+edit_buf:      .space  6       # Line editor buffer
+card_name:     .word   0       # Pointer to adapter name
+scanning:      .byte   0       # Performing mode scan
+do_restore:    .byte   0       # Screen contents altered during mode change
+svga_prefix:   .byte   VIDEO_FIRST_BIOS>>8     # Default prefix for BIOS modes
+graphic_mode:  .byte   0       # Graphic mode with a linear frame buffer
+
+# Status messages
+keymsg:                .ascii  "Press <RETURN> to see video modes available, "
+               .ascii  "<SPACE> to continue or wait 30 secs"
+               .byte   0x0d, 0x0a, 0
+
+listhdr:       .byte   0x0d, 0x0a
+               .ascii  "Mode:    COLSxROWS:"
+
+crlft:         .byte   0x0d, 0x0a, 0
+
+prompt:                .byte   0x0d, 0x0a
+               .asciz  "Enter mode number or `scan': "
+
+unknt:         .asciz  "Unknown mode ID. Try again."
+
+badmdt:                .ascii  "You passed an undefined mode number."
+               .byte   0x0d, 0x0a, 0
+
+vesaer:                .ascii  "Error: Scanning of VESA modes failed. Please "
+               .ascii  "report to <mj@ucw.cz>."
+               .byte   0x0d, 0x0a, 0
+
+old_name:      .asciz  "CGA/MDA/HGA"
+
+ega_name:      .asciz  "EGA"
+
+svga_name:     .ascii  " "
+
+vga_name:      .asciz  "VGA"
+
+vesa_name:     .asciz  "VESA"
+
+name_bann:     .asciz  "Video adapter: "
+#endif /* CONFIG_VIDEO_SELECT */
+
+# Other variables:
+adapter:       .byte   0       # Video adapter: 0=CGA/MDA/HGA,1=EGA,2=VGA
+video_segment: .word   0xb800  # Video memory segment
+force_size:    .word   0       # Use this size instead of the one in BIOS vars
diff --git a/arch/x86_64/config.in b/arch/x86_64/config.in
new file mode 100644 (file)
index 0000000..c61b071
--- /dev/null
@@ -0,0 +1,240 @@
+#
+# For a description of the syntax of this configuration file,
+# see Documentation/kbuild/config-language.txt.
+#
+mainmenu_name "Linux Kernel Configuration"
+
+define_bool CONFIG_X86_64 y
+
+define_bool CONFIG_X86 y
+define_bool CONFIG_ISA y
+define_bool CONFIG_SBUS n
+
+define_bool CONFIG_UID16 y
+define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n
+define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y
+
+
+source init/Config.in
+
+mainmenu_option next_comment
+comment 'Processor type and features'
+choice 'Processor family' \
+       "Clawhammer                     CONFIG_MK8" Clawhammer
+       
+#
+# Define implied options from the CPU selection here
+#
+define_int CONFIG_X86_L1_CACHE_BYTES 64
+define_int CONFIG_X86_L1_CACHE_SHIFT 6
+define_bool CONFIG_X86_TSC y
+define_bool CONFIG_X86_GOOD_APIC y
+define_bool CONFIG_X86_CMPXCHG
+
+tristate '/dev/cpu/*/msr - Model-specific register support' CONFIG_X86_MSR
+tristate '/dev/cpu/*/cpuid - CPU information support' CONFIG_X86_CPUID
+
+define_bool CONFIG_MATH_EMULATION n
+define_bool CONFIG_MCA n
+define_bool CONFIG_EISA n
+
+bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
+bool 'Symmetric multi-processing support' CONFIG_SMP
+bool 'Preemptible Kernel' CONFIG_PREEMPT
+# currently doesn't boot without hacks. probably simulator bug.
+#if [ "$CONFIG_SMP" != "y" ]; then
+#   bool 'APIC and IO-APIC support on uniprocessors' CONFIG_X86_UP_IOAPIC
+#    if [ "$CONFIG_X86_UP_IOAPIC" = "y" ]; then
+#       define_bool CONFIG_X86_IO_APIC y
+#       define_bool CONFIG_X86_LOCAL_APIC y
+#    fi
+#fi
+if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
+    define_bool CONFIG_HAVE_DEC_LOCK y
+fi
+endmenu
+
+mainmenu_option next_comment
+comment 'General options'
+
+if [ "$CONFIG_SMP" = "y" ]; then
+   define_bool CONFIG_X86_IO_APIC y
+   define_bool CONFIG_X86_LOCAL_APIC y
+fi
+bool 'PCI support' CONFIG_PCI
+if [ "$CONFIG_PCI" = "y" ]; then
+   define_bool CONFIG_PCI_DIRECT y 
+fi
+
+source drivers/pci/Config.in
+
+bool 'Support for hot-pluggable devices' CONFIG_HOTPLUG
+
+if [ "$CONFIG_HOTPLUG" = "y" ] ; then
+   source drivers/pcmcia/Config.in
+else
+   define_bool CONFIG_PCMCIA n
+fi
+
+if [ "$CONFIG_PROC_FS" = "y" ]; then
+   define_bool CONFIG_KCORE_ELF y
+fi
+# We probably are not going to support a.out, are we? Or should we support a.out in i386 compatibility mode?
+#tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT
+tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF
+tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC
+
+bool 'Power Management support' CONFIG_PM
+
+bool 'IA32 Emulation' CONFIG_IA32_EMULATION
+
+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+   dep_bool '  ACPI support' CONFIG_ACPI $CONFIG_PM
+   if [ "$CONFIG_ACPI" != "n" ]; then
+      source drivers/acpi/Config.in
+   fi
+fi
+
+endmenu
+
+source drivers/mtd/Config.in
+
+source drivers/parport/Config.in
+
+source drivers/pnp/Config.in
+
+source drivers/block/Config.in
+
+source drivers/md/Config.in
+
+if [ "$CONFIG_NET" = "y" ]; then
+   source net/Config.in
+fi
+
+source drivers/telephony/Config.in
+
+mainmenu_option next_comment
+comment 'ATA/IDE/MFM/RLL support'
+
+tristate 'ATA/IDE/MFM/RLL support' CONFIG_IDE
+
+if [ "$CONFIG_IDE" != "n" ]; then
+  source drivers/ide/Config.in
+else
+  define_bool CONFIG_BLK_DEV_IDE_MODES n
+  define_bool CONFIG_BLK_DEV_HD n
+fi
+endmenu
+
+mainmenu_option next_comment
+comment 'SCSI support'
+
+tristate 'SCSI support' CONFIG_SCSI
+
+if [ "$CONFIG_SCSI" != "n" ]; then
+   source drivers/scsi/Config.in
+fi
+endmenu
+
+source drivers/message/fusion/Config.in
+
+source drivers/ieee1394/Config.in
+
+#Currently not 64bit safe
+#source drivers/message/i2o/Config.in
+
+if [ "$CONFIG_NET" = "y" ]; then
+   mainmenu_option next_comment
+   comment 'Network device support'
+
+   bool 'Network device support' CONFIG_NETDEVICES
+   if [ "$CONFIG_NETDEVICES" = "y" ]; then
+      source drivers/net/Config.in
+      if [ "$CONFIG_ATM" = "y" ]; then
+         source drivers/atm/Config.in
+      fi
+   fi
+   endmenu
+fi
+
+source net/ax25/Config.in
+
+source net/irda/Config.in
+
+mainmenu_option next_comment
+comment 'ISDN subsystem'
+if [ "$CONFIG_NET" != "n" ]; then
+   tristate 'ISDN support' CONFIG_ISDN
+   if [ "$CONFIG_ISDN" != "n" ]; then
+      source drivers/isdn/Config.in
+   fi
+fi
+endmenu
+
+mainmenu_option next_comment
+comment 'Old CD-ROM drivers (not SCSI, not IDE)'
+
+bool 'Support non-SCSI/IDE/ATAPI CDROM drives' CONFIG_CD_NO_IDESCSI
+if [ "$CONFIG_CD_NO_IDESCSI" != "n" ]; then
+   source drivers/cdrom/Config.in
+fi
+endmenu
+
+#
+# input before char - char/joystick depends on it. As does USB.
+#
+source drivers/input/Config.in
+source drivers/char/Config.in
+
+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+   source net/bluetooth/Config.in
+fi
+
+source drivers/misc/Config.in
+
+source drivers/media/Config.in
+
+source fs/Config.in
+
+if [ "$CONFIG_VT" = "y" ]; then
+   mainmenu_option next_comment
+   comment 'Console drivers'
+   bool 'VGA text console' CONFIG_VGA_CONSOLE
+   bool 'Video mode selection support' CONFIG_VIDEO_SELECT
+   if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+      tristate 'MDA text console (dual-headed) (EXPERIMENTAL)' CONFIG_MDA_CONSOLE
+      source drivers/video/Config.in
+   fi
+   endmenu
+fi
+
+mainmenu_option next_comment
+comment 'Sound'
+
+tristate 'Sound card support' CONFIG_SOUND
+if [ "$CONFIG_SOUND" != "n" ]; then
+   source drivers/sound/Config.in
+fi
+endmenu
+
+source drivers/usb/Config.in
+
+mainmenu_option next_comment
+comment 'Kernel hacking'
+
+bool 'Kernel debugging' CONFIG_DEBUG_KERNEL
+if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then
+   bool '  Debug memory allocations' CONFIG_DEBUG_SLAB
+#   bool '  Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT
+   bool '  Magic SysRq key' CONFIG_MAGIC_SYSRQ
+   bool '  Spinlock debugging' CONFIG_DEBUG_SPINLOCK
+#   bool '  Early printk' CONFIG_EARLY_PRINTK
+   bool '  Additional run-time checks' CONFIG_CHECKING
+fi
+bool 'Simnow environment (disables time-consuming things)' CONFIG_SIMNOW
+#if [ "$CONFIG_SERIAL_CONSOLE" = "y" ]; then
+#  bool 'Early serial console (ttyS0)' CONFIG_EARLY_SERIAL_CONSOLE
+#fi
+endmenu
+
+source lib/Config.in
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
new file mode 100644 (file)
index 0000000..61accc3
--- /dev/null
@@ -0,0 +1,568 @@
+#
+# Automatically generated make config: don't edit
+#
+CONFIG_X86_64=y
+CONFIG_X86=y
+CONFIG_ISA=y
+# CONFIG_SBUS is not set
+CONFIG_UID16=y
+# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+
+#
+# General setup
+#
+CONFIG_NET=y
+CONFIG_SYSVIPC=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+CONFIG_SYSCTL=y
+
+#
+# Loadable module support
+#
+CONFIG_MODULES=y
+# CONFIG_MODVERSIONS is not set
+# CONFIG_KMOD is not set
+
+#
+# Processor type and features
+#
+CONFIG_MK8=y
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_L1_CACHE_SHIFT=6
+CONFIG_X86_TSC=y
+CONFIG_X86_GOOD_APIC=y
+CONFIG_X86_MSR=y
+CONFIG_X86_CPUID=y
+# CONFIG_MATH_EMULATION is not set
+# CONFIG_MCA is not set
+# CONFIG_EISA is not set
+CONFIG_MTRR=y
+# CONFIG_SMP is not set
+# CONFIG_PREEMPT is not set
+
+#
+# General options
+#
+CONFIG_PCI=y
+CONFIG_PCI_DIRECT=y
+# CONFIG_PCI_NAMES is not set
+# CONFIG_HOTPLUG is not set
+# CONFIG_PCMCIA is not set
+CONFIG_KCORE_ELF=y
+CONFIG_BINFMT_ELF=y
+# CONFIG_BINFMT_MISC is not set
+CONFIG_PM=y
+CONFIG_IA32_EMULATION=y
+CONFIG_ACPI=y
+CONFIG_ACPI_DEBUG=y
+CONFIG_ACPI_BUSMGR=y
+CONFIG_ACPI_SYS=y
+CONFIG_ACPI_CPU=y
+CONFIG_ACPI_BUTTON=y
+CONFIG_ACPI_AC=y
+CONFIG_ACPI_EC=y
+CONFIG_ACPI_CMBATT=y
+CONFIG_ACPI_THERMAL=y
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Parallel port support
+#
+# CONFIG_PARPORT is not set
+
+#
+# Plug and Play configuration
+#
+# CONFIG_PNP is not set
+# CONFIG_ISAPNP is not set
+# CONFIG_PNPBIOS is not set
+
+#
+# Block devices
+#
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_PARIDE is not set
+# CONFIG_BLK_CPQ_DA is not set
+# CONFIG_BLK_CPQ_CISS_DA is not set
+# CONFIG_CISS_SCSI_TAPE is not set
+# CONFIG_BLK_DEV_DAC960 is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=4096
+CONFIG_BLK_DEV_INITRD=y
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+# CONFIG_BLK_DEV_MD is not set
+# CONFIG_MD_LINEAR is not set
+# CONFIG_MD_RAID0 is not set
+# CONFIG_MD_RAID1 is not set
+# CONFIG_MD_RAID5 is not set
+# CONFIG_MD_MULTIPATH is not set
+# CONFIG_BLK_DEV_LVM is not set
+
+#
+# Networking options
+#
+# CONFIG_PACKET is not set
+# CONFIG_NETLINK_DEV is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_FILTER is not set
+CONFIG_UNIX=y
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_INET_ECN is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_IPV6 is not set
+# CONFIG_KHTTPD is not set
+# CONFIG_ATM is not set
+# CONFIG_VLAN_8021Q is not set
+
+#
+#  
+#
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_DECNET is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_LLC is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_FASTROUTE is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Telephony Support
+#
+# CONFIG_PHONE is not set
+# CONFIG_PHONE_IXJ is not set
+# CONFIG_PHONE_IXJ_PCMCIA is not set
+
+#
+# ATA/IDE/MFM/RLL support
+#
+CONFIG_IDE=y
+
+#
+# IDE, ATA and ATAPI Block devices
+#
+CONFIG_BLK_DEV_IDE=y
+
+#
+# Please see Documentation/ide.txt for help/info on IDE drives
+#
+# CONFIG_BLK_DEV_HD_IDE is not set
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_BLK_DEV_IDEDISK=y
+# CONFIG_IDEDISK_MULTI_MODE is not set
+# CONFIG_IDEDISK_STROKE is not set
+# CONFIG_BLK_DEV_IDEDISK_VENDOR is not set
+# CONFIG_BLK_DEV_IDEDISK_FUJITSU is not set
+# CONFIG_BLK_DEV_IDEDISK_IBM is not set
+# CONFIG_BLK_DEV_IDEDISK_MAXTOR is not set
+# CONFIG_BLK_DEV_IDEDISK_QUANTUM is not set
+# CONFIG_BLK_DEV_IDEDISK_SEAGATE is not set
+# CONFIG_BLK_DEV_IDEDISK_WD is not set
+# CONFIG_BLK_DEV_COMMERIAL is not set
+# CONFIG_BLK_DEV_TIVO is not set
+# CONFIG_BLK_DEV_IDECS is not set
+# CONFIG_BLK_DEV_IDECD is not set
+# CONFIG_BLK_DEV_IDETAPE is not set
+# CONFIG_BLK_DEV_IDEFLOPPY is not set
+# CONFIG_BLK_DEV_IDESCSI is not set
+# CONFIG_IDE_TASK_IOCTL is not set
+
+#
+# IDE chipset support/bugfixes
+#
+# CONFIG_BLK_DEV_CMD640 is not set
+# CONFIG_BLK_DEV_CMD640_ENHANCED is not set
+# CONFIG_BLK_DEV_ISAPNP is not set
+# CONFIG_BLK_DEV_RZ1000 is not set
+# CONFIG_BLK_DEV_IDEPCI is not set
+# CONFIG_IDE_CHIPSETS is not set
+# CONFIG_IDEDMA_AUTO is not set
+# CONFIG_DMA_NONPCI is not set
+# CONFIG_BLK_DEV_IDE_MODES is not set
+# CONFIG_BLK_DEV_ATARAID is not set
+# CONFIG_BLK_DEV_ATARAID_PDC is not set
+# CONFIG_BLK_DEV_ATARAID_HPT is not set
+
+#
+# SCSI support
+#
+# CONFIG_SCSI is not set
+
+#
+# Fusion MPT device support
+#
+# CONFIG_FUSION is not set
+# CONFIG_FUSION_BOOT is not set
+# CONFIG_FUSION_ISENSE is not set
+# CONFIG_FUSION_CTL is not set
+# CONFIG_FUSION_LAN is not set
+
+#
+# IEEE 1394 (FireWire) support (EXPERIMENTAL)
+#
+# CONFIG_IEEE1394 is not set
+
+#
+# Network device support
+#
+# CONFIG_NETDEVICES is not set
+
+#
+# Amateur Radio support
+#
+# CONFIG_HAMRADIO is not set
+
+#
+# IrDA (infrared) support
+#
+# CONFIG_IRDA is not set
+
+#
+# ISDN subsystem
+#
+# CONFIG_ISDN is not set
+
+#
+# Old CD-ROM drivers (not SCSI, not IDE)
+#
+# CONFIG_CD_NO_IDESCSI is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+# CONFIG_INPUT_KEYBDEV is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_JOYDEV is not set
+# CONFIG_INPUT_EVDEV is not set
+# CONFIG_GAMEPORT is not set
+CONFIG_SOUND_GAMEPORT=y
+# CONFIG_GAMEPORT_NS558 is not set
+# CONFIG_GAMEPORT_L4 is not set
+# CONFIG_INPUT_EMU10K1 is not set
+# CONFIG_GAMEPORT_PCIGAME is not set
+# CONFIG_GAMEPORT_FM801 is not set
+# CONFIG_GAMEPORT_CS461x is not set
+# CONFIG_SERIO is not set
+# CONFIG_SERIO_SERPORT is not set
+
+#
+# Character devices
+#
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_SERIAL=y
+CONFIG_SERIAL_CONSOLE=y
+# CONFIG_SERIAL_EXTENDED is not set
+# CONFIG_SERIAL_NONSTANDARD is not set
+CONFIG_UNIX98_PTYS=y
+CONFIG_UNIX98_PTY_COUNT=256
+
+#
+# I2C support
+#
+# CONFIG_I2C is not set
+
+#
+# Mice
+#
+# CONFIG_BUSMOUSE is not set
+CONFIG_MOUSE=y
+CONFIG_PSMOUSE=y
+# CONFIG_82C710_MOUSE is not set
+# CONFIG_PC110_PAD is not set
+# CONFIG_QIC02_TAPE is not set
+
+#
+# Watchdog Cards
+#
+# CONFIG_WATCHDOG is not set
+# CONFIG_INTEL_RNG is not set
+# CONFIG_NVRAM is not set
+# CONFIG_RTC is not set
+# CONFIG_DTLK is not set
+# CONFIG_R3964 is not set
+# CONFIG_APPLICOM is not set
+# CONFIG_SONYPI is not set
+
+#
+# Ftape, the floppy tape device driver
+#
+# CONFIG_FTAPE is not set
+# CONFIG_AGP is not set
+# CONFIG_DRM is not set
+# CONFIG_MWAVE is not set
+
+#
+# Bluetooth support
+#
+# CONFIG_BLUEZ is not set
+
+#
+# Misc devices
+#
+
+#
+# Multimedia devices
+#
+# CONFIG_VIDEO_DEV is not set
+
+#
+# File systems
+#
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_REISERFS_CHECK is not set
+# CONFIG_REISERFS_PROC_INFO is not set
+# CONFIG_ADFS_FS is not set
+# CONFIG_ADFS_FS_RW is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_JBD is not set
+# CONFIG_JBD_DEBUG is not set
+# CONFIG_FAT_FS is not set
+# CONFIG_MSDOS_FS is not set
+# CONFIG_UMSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_JFFS_FS is not set
+# CONFIG_JFFS2_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_TMPFS is not set
+CONFIG_RAMFS=y
+# CONFIG_ISO9660_FS is not set
+# CONFIG_JOLIET is not set
+# CONFIG_ZISOFS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_NTFS_FS is not set
+# CONFIG_NTFS_RW is not set
+# CONFIG_HPFS_FS is not set
+CONFIG_PROC_FS=y
+# CONFIG_DEVFS_FS is not set
+# CONFIG_DEVFS_MOUNT is not set
+# CONFIG_DEVFS_DEBUG is not set
+CONFIG_DEVPTS_FS=y
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_QNX4FS_RW is not set
+# CONFIG_ROMFS_FS is not set
+CONFIG_EXT2_FS=y
+# CONFIG_SYSV_FS is not set
+# CONFIG_UDF_FS is not set
+# CONFIG_UDF_RW is not set
+# CONFIG_UFS_FS is not set
+# CONFIG_UFS_FS_WRITE is not set
+CONFIG_SIMICSFS=y
+
+#
+# Network File Systems
+#
+# CONFIG_CODA_FS is not set
+# CONFIG_INTERMEZZO_FS is not set
+# CONFIG_NFS_FS is not set
+# CONFIG_NFS_V3 is not set
+# CONFIG_ROOT_NFS is not set
+# CONFIG_NFSD is not set
+# CONFIG_NFSD_V3 is not set
+# CONFIG_SUNRPC is not set
+# CONFIG_LOCKD is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_NCPFS_PACKET_SIGNING is not set
+# CONFIG_NCPFS_IOCTL_LOCKING is not set
+# CONFIG_NCPFS_STRONG is not set
+# CONFIG_NCPFS_NFS_NS is not set
+# CONFIG_NCPFS_OS2_NS is not set
+# CONFIG_NCPFS_SMALLDOS is not set
+# CONFIG_NCPFS_NLS is not set
+# CONFIG_NCPFS_EXTRAS is not set
+# CONFIG_ZISOFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_SMB_NLS is not set
+# CONFIG_NLS is not set
+
+#
+# Console drivers
+#
+CONFIG_VGA_CONSOLE=y
+# CONFIG_VIDEO_SELECT is not set
+# CONFIG_MDA_CONSOLE is not set
+
+#
+# Frame-buffer support
+#
+# CONFIG_FB is not set
+
+#
+# Sound
+#
+# CONFIG_SOUND is not set
+
+#
+# USB support
+#
+# CONFIG_USB is not set
+
+#
+# USB Host Controller Drivers
+#
+# CONFIG_USB_EHCI_HCD is not set
+# CONFIG_USB_OHCI_HCD is not set
+# CONFIG_USB_UHCI is not set
+# CONFIG_USB_UHCI_ALT is not set
+# CONFIG_USB_OHCI is not set
+
+#
+# USB Device Class drivers
+#
+# CONFIG_USB_AUDIO is not set
+# CONFIG_USB_BLUETOOTH is not set
+
+#
+#   SCSI support is needed for USB Storage
+#
+# CONFIG_USB_STORAGE is not set
+# CONFIG_USB_STORAGE_DEBUG is not set
+# CONFIG_USB_STORAGE_DATAFAB is not set
+# CONFIG_USB_STORAGE_FREECOM is not set
+# CONFIG_USB_STORAGE_ISD200 is not set
+# CONFIG_USB_STORAGE_DPCM is not set
+# CONFIG_USB_STORAGE_HP8200e is not set
+# CONFIG_USB_STORAGE_SDDR09 is not set
+# CONFIG_USB_STORAGE_JUMPSHOT is not set
+# CONFIG_USB_ACM is not set
+# CONFIG_USB_PRINTER is not set
+
+#
+# USB Human Interface Devices (HID)
+#
+
+#
+#   Input core support is needed for USB HID
+#
+
+#
+# USB Imaging devices
+#
+# CONFIG_USB_DC2XX is not set
+# CONFIG_USB_MDC800 is not set
+# CONFIG_USB_SCANNER is not set
+# CONFIG_USB_MICROTEK is not set
+# CONFIG_USB_HPUSBSCSI is not set
+
+#
+# USB Multimedia devices
+#
+
+#
+#   Video4Linux support is needed for USB Multimedia device support
+#
+
+#
+# USB Network adaptors
+#
+# CONFIG_USB_PEGASUS is not set
+# CONFIG_USB_KAWETH is not set
+# CONFIG_USB_CATC is not set
+# CONFIG_USB_CDCETHER is not set
+# CONFIG_USB_USBNET is not set
+
+#
+# USB port drivers
+#
+# CONFIG_USB_USS720 is not set
+
+#
+# USB Serial Converter support
+#
+# CONFIG_USB_SERIAL is not set
+# CONFIG_USB_SERIAL_GENERIC is not set
+# CONFIG_USB_SERIAL_BELKIN is not set
+# CONFIG_USB_SERIAL_WHITEHEAT is not set
+# CONFIG_USB_SERIAL_DIGI_ACCELEPORT is not set
+# CONFIG_USB_SERIAL_EMPEG is not set
+# CONFIG_USB_SERIAL_FTDI_SIO is not set
+# CONFIG_USB_SERIAL_VISOR is not set
+# CONFIG_USB_SERIAL_IPAQ is not set
+# CONFIG_USB_SERIAL_IR is not set
+# CONFIG_USB_SERIAL_EDGEPORT is not set
+# CONFIG_USB_SERIAL_KEYSPAN_PDA is not set
+# CONFIG_USB_SERIAL_KEYSPAN is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA28X is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA28XA is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA28XB is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA19W is not set
+# CONFIG_USB_SERIAL_KEYSPAN_USA49W is not set
+# CONFIG_USB_SERIAL_MCT_U232 is not set
+# CONFIG_USB_SERIAL_KLSI is not set
+# CONFIG_USB_SERIAL_PL2303 is not set
+# CONFIG_USB_SERIAL_CYBERJACK is not set
+# CONFIG_USB_SERIAL_XIRCOM is not set
+# CONFIG_USB_SERIAL_OMNINET is not set
+
+#
+# USB Miscellaneous drivers
+#
+# CONFIG_USB_RIO500 is not set
+# CONFIG_USB_AUERSWALD is not set
+
+#
+# Kernel hacking
+#
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_MAGIC_SYSRQ is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_CHECKING is not set
+CONFIG_SIMNOW=y
+
+#
+# Library routines
+#
+# CONFIG_CRC32 is not set
+# CONFIG_ZLIB_INFLATE is not set
+# CONFIG_ZLIB_DEFLATE is not set
diff --git a/arch/x86_64/ia32/Makefile b/arch/x86_64/ia32/Makefile
new file mode 100644 (file)
index 0000000..660bb29
--- /dev/null
@@ -0,0 +1,18 @@
+#
+# Makefile for the ia32 kernel emulation subsystem.
+#
+
+.S.s:
+       $(CPP) $(AFLAGS) -o $*.s $<
+.S.o:
+       $(CC) $(AFLAGS) -c -o $*.o $<
+
+all: ia32.o
+
+O_TARGET := ia32.o
+obj-$(CONFIG_IA32_EMULATION)    := ia32entry.o sys_ia32.o ia32_ioctl.o ia32_signal.o ia32_binfmt.o \
+       socket32.o ptrace32.o
+
+clean::
+
+include $(TOPDIR)/Rules.make
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
new file mode 100644 (file)
index 0000000..f9baffd
--- /dev/null
@@ -0,0 +1,165 @@
+/* 
+ * Written 2000 by Andi Kleen. 
+ * 
+ * Losely based on the sparc64 and IA64 32bit emulation loaders.
+ */ 
+#include <linux/types.h>
+#include <linux/config.h> 
+#include <linux/stddef.h>
+#include <linux/module.h>
+#include <linux/rwsem.h>
+#include <asm/segment.h> 
+#include <asm/ptrace.h>
+#include <asm/processor.h>
+
+#define IA32_EMULATOR 1
+
+#define IA32_PAGE_OFFSET 0xE0000000
+#define IA32_STACK_TOP IA32_PAGE_OFFSET
+#define ELF_ET_DYN_BASE                (IA32_PAGE_OFFSET/3 + 0x1000000)
+
+#undef ELF_ARCH
+#define ELF_ARCH EM_386
+
+#undef ELF_CLASS
+#define ELF_CLASS ELFCLASS32
+
+#define ELF_DATA       ELFDATA2LSB
+//#define USE_ELF_CORE_DUMP
+
+#define __ASM_X86_64_ELF_H 1
+#include <asm/ia32.h>
+#include <linux/elf.h>
+
+typedef __u32  elf_greg_t;
+
+typedef elf_greg_t elf_gregset_t[8];
+
+/* FIXME -- wrong */
+typedef struct user_i387_ia32_struct elf_fpregset_t;
+typedef struct user_i387_struct elf_fpxregset_t;
+
+#undef elf_check_arch
+#define elf_check_arch(x) \
+       ((x)->e_machine == EM_386)
+
+#define ELF_EXEC_PAGESIZE PAGE_SIZE
+#define ELF_HWCAP (boot_cpu_data.x86_capability[0])
+#define ELF_PLATFORM  ("i686")
+#define SET_PERSONALITY(ex, ibcs2)                     \
+do {                                                   \
+       set_personality((ibcs2)?PER_SVR4:current->personality); \
+} while (0)
+
+/* Override some function names */
+#define elf_format                     elf32_format
+
+#define init_elf_binfmt                        init_elf32_binfmt
+#define exit_elf_binfmt                        exit_elf32_binfmt
+
+#define load_elf_binary load_elf32_binary
+
+#undef CONFIG_BINFMT_ELF
+#ifdef CONFIG_BINFMT_ELF32
+# define CONFIG_BINFMT_ELF             CONFIG_BINFMT_ELF32
+#endif
+
+#undef CONFIG_BINFMT_ELF_MODULE
+#ifdef CONFIG_BINFMT_ELF32_MODULE
+# define CONFIG_BINFMT_ELF_MODULE      CONFIG_BINFMT_ELF32_MODULE
+#endif
+
+#define ELF_PLAT_INIT(r)               elf32_init(r)
+#define setup_arg_pages(bprm)          ia32_setup_arg_pages(bprm)
+
+#undef start_thread
+#define start_thread(regs,new_rip,new_rsp) do { \
+       __asm__("movl %0,%%fs": :"r" (0)); \
+       __asm__("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); \
+       wrmsrl(MSR_KERNEL_GS_BASE, 0); \
+       set_thread_flag(TIF_IA32); \
+       (regs)->rip = (new_rip); \
+       (regs)->rsp = (new_rsp); \
+       (regs)->eflags = 0x200; \
+       (regs)->cs = __USER32_CS; \
+       (regs)->ss = __USER32_DS; \
+       set_fs(USER_DS); \
+} while(0) 
+
+
+MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries."); 
+MODULE_AUTHOR("Eric Youngdale, Andi Kleen");
+
+#undef MODULE_DESCRIPTION
+#undef MODULE_AUTHOR
+
+#define elf_addr_t __u32
+#define elf_caddr_t __u32
+
+static void elf32_init(struct pt_regs *);
+
+#include "../../../fs/binfmt_elf.c" 
+
+static void elf32_init(struct pt_regs *regs)
+{
+       regs->rdi = 0;
+       regs->rsi = 0;
+       regs->rdx = 0;
+       regs->rcx = 0;
+       regs->rax = 0;
+       regs->rbx = 0; 
+       regs->rbp = 0; 
+        current->thread.fs = 0; current->thread.gs = 0;
+       current->thread.fsindex = 0; current->thread.gsindex = 0;
+        current->thread.ds = __USER_DS; current->thread.es == __USER_DS;
+}
+
+extern void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address);
+
+int ia32_setup_arg_pages(struct linux_binprm *bprm)
+{
+       unsigned long stack_base;
+       struct vm_area_struct *mpnt;
+       int i;
+
+       stack_base = IA32_STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
+
+       bprm->p += stack_base;
+       if (bprm->loader)
+               bprm->loader += stack_base;
+       bprm->exec += stack_base;
+
+       mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
+       if (!mpnt) 
+               return -ENOMEM; 
+       
+       down_write(&current->mm->mmap_sem);
+       {
+               mpnt->vm_mm = current->mm;
+               mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
+               mpnt->vm_end = IA32_STACK_TOP;
+               mpnt->vm_page_prot = PAGE_COPY;
+               mpnt->vm_flags = VM_STACK_FLAGS;
+               mpnt->vm_ops = NULL;
+               mpnt->vm_pgoff = 0;
+               mpnt->vm_file = NULL;
+               mpnt->vm_private_data = (void *) 0;
+               insert_vm_struct(current->mm, mpnt);
+               current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+       } 
+
+       for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
+               struct page *page = bprm->page[i];
+               if (page) {
+                       bprm->page[i] = NULL;
+                       current->mm->rss++;
+                       put_dirty_page(current,page,stack_base);
+               }
+               stack_base += PAGE_SIZE;
+       }
+       up_write(&current->mm->mmap_sem);
+       
+       return 0;
+}
+
diff --git a/arch/x86_64/ia32/ia32_ioctl.c b/arch/x86_64/ia32/ia32_ioctl.c
new file mode 100644 (file)
index 0000000..5755a54
--- /dev/null
@@ -0,0 +1,3843 @@
+/* $Id: ia32_ioctl.c,v 1.2 2001/07/05 06:28:42 ak Exp $
+ * ioctl32.c: Conversion between 32bit and 64bit native ioctls.
+ *
+ * Copyright (C) 1997-2000  Jakub Jelinek  (jakub@redhat.com)
+ * Copyright (C) 1998  Eddie C. Dost  (ecd@skynet.be)
+ * Copyright (C) 2001  Andi Kleen, SuSE Labs 
+ *
+ * These routines maintain argument size conversion between 32bit and 64bit
+ * ioctls.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/ioctl.h>
+#include <linux/if.h>
+#include <linux/slab.h>
+#include <linux/hdreg.h>
+#include <linux/raid/md.h>
+#include <linux/kd.h>
+#include <linux/route.h>
+#include <linux/in6.h>
+#include <linux/ipv6_route.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/vt.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/fd.h>
+#include <linux/ppp_defs.h>
+#include <linux/if_ppp.h>
+#include <linux/if_pppox.h>
+#include <linux/mtio.h>
+#include <linux/cdrom.h>
+#include <linux/loop.h>
+#include <linux/auto_fs.h>
+#include <linux/devfs_fs.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>
+#include <linux/fb.h>
+#include <linux/ext2_fs.h>
+#include <linux/videodev.h>
+#include <linux/netdevice.h>
+#include <linux/raw.h>
+#include <linux/smb_fs.h>
+#include <linux/blkpg.h>
+#include <linux/blk.h>
+#include <linux/elevator.h>
+#include <linux/rtc.h>
+#include <linux/pci.h>
+#if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
+/* Ugh. This header really is not clean */
+#define min min
+#define max max
+#include <linux/lvm.h>
+#endif /* LVM */
+
+#include <scsi/scsi.h>
+/* Ugly hack. */
+#undef __KERNEL__
+#include <scsi/scsi_ioctl.h>
+#define __KERNEL__
+#include <scsi/sg.h>
+
+#include <asm/types.h>
+#include <asm/ia32.h>
+#include <asm/uaccess.h>
+#include <linux/ethtool.h>
+#include <asm/module.h>
+#include <linux/soundcard.h>
+
+#include <linux/atm.h>
+#include <linux/atmarp.h>
+#include <linux/atmclip.h>
+#include <linux/atmdev.h>
+#include <linux/atmioc.h>
+#include <linux/atmlec.h>
+#include <linux/atmmpc.h>
+#include <linux/atmsvc.h>
+#include <linux/atm_tcp.h>
+#include <linux/sonet.h>
+#include <linux/atm_suni.h>
+
+#define A(__x) ((void *)(unsigned long)(__x))
+#define AA(__x)        A(__x)
+
+/* Aiee. Someone does not find a difference between int and long */
+#define EXT2_IOC32_GETFLAGS               _IOR('f', 1, int)
+#define EXT2_IOC32_SETFLAGS               _IOW('f', 2, int)
+#define EXT2_IOC32_GETVERSION             _IOR('v', 1, int)
+#define EXT2_IOC32_SETVERSION             _IOW('v', 2, int)
+
+extern asmlinkage int sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg);
+
+static int w_long(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       mm_segment_t old_fs = get_fs();
+       int err;
+       unsigned long val;
+       
+       set_fs (KERNEL_DS);
+       err = sys_ioctl(fd, cmd, (unsigned long)&val);
+       set_fs (old_fs);
+       if (!err && put_user(val, (u32 *)arg))
+               return -EFAULT;
+       return err;
+}
+static int rw_long(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       mm_segment_t old_fs = get_fs();
+       int err;
+       unsigned long val;
+       
+       if(get_user(val, (u32 *)arg))
+               return -EFAULT;
+       set_fs (KERNEL_DS);
+       err = sys_ioctl(fd, cmd, (unsigned long)&val);
+       set_fs (old_fs);
+       if (!err && put_user(val, (u32 *)arg))
+               return -EFAULT;
+       return err;
+}
+
+static int do_ext2_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       /* These are just misnamed, they actually get/put from/to user an int */
+       switch (cmd) {
+       case EXT2_IOC32_GETFLAGS: cmd = EXT2_IOC_GETFLAGS; break;
+       case EXT2_IOC32_SETFLAGS: cmd = EXT2_IOC_SETFLAGS; break;
+       case EXT2_IOC32_GETVERSION: cmd = EXT2_IOC_GETVERSION; break;
+       case EXT2_IOC32_SETVERSION: cmd = EXT2_IOC_SETVERSION; break;
+       }
+       return sys_ioctl(fd, cmd, arg);
+}
+struct video_tuner32 {
+       s32 tuner;
+       u8 name[32];
+       u32 rangelow, rangehigh;
+       u32 flags;
+       u16 mode, signal;
+};
+
+static int get_video_tuner32(struct video_tuner *kp, struct video_tuner32 *up)
+{
+       int i;
+
+       if(get_user(kp->tuner, &up->tuner))
+               return -EFAULT;
+       for(i = 0; i < 32; i++)
+               __get_user(kp->name[i], &up->name[i]);
+       __get_user(kp->rangelow, &up->rangelow);
+       __get_user(kp->rangehigh, &up->rangehigh);
+       __get_user(kp->flags, &up->flags);
+       __get_user(kp->mode, &up->mode);
+       __get_user(kp->signal, &up->signal);
+       return 0;
+}
+
+static int put_video_tuner32(struct video_tuner *kp, struct video_tuner32 *up)
+{
+       int i;
+
+       if(put_user(kp->tuner, &up->tuner))
+               return -EFAULT;
+       for(i = 0; i < 32; i++)
+               __put_user(kp->name[i], &up->name[i]);
+       __put_user(kp->rangelow, &up->rangelow);
+       __put_user(kp->rangehigh, &up->rangehigh);
+       __put_user(kp->flags, &up->flags);
+       __put_user(kp->mode, &up->mode);
+       __put_user(kp->signal, &up->signal);
+       return 0;
+}
+
+struct video_buffer32 {
+       /* void * */ u32 base;
+       s32 height, width, depth, bytesperline;
+};
+
+static int get_video_buffer32(struct video_buffer *kp, struct video_buffer32 *up)
+{
+       u32 tmp;
+
+       if(get_user(tmp, &up->base))
+               return -EFAULT;
+       kp->base = (void *) ((unsigned long)tmp);
+       __get_user(kp->height, &up->height);
+       __get_user(kp->width, &up->width);
+       __get_user(kp->depth, &up->depth);
+       __get_user(kp->bytesperline, &up->bytesperline);
+       return 0;
+}
+
+static int put_video_buffer32(struct video_buffer *kp, struct video_buffer32 *up)
+{
+       u32 tmp = (u32)((unsigned long)kp->base);
+
+       if(put_user(tmp, &up->base))
+               return -EFAULT;
+       __put_user(kp->height, &up->height);
+       __put_user(kp->width, &up->width);
+       __put_user(kp->depth, &up->depth);
+       __put_user(kp->bytesperline, &up->bytesperline);
+       return 0;
+}
+
+struct video_clip32 {
+       s32 x, y, width, height;
+       /* struct video_clip32 * */ u32 next;
+};
+
+struct video_window32 {
+       u32 x, y, width, height, chromakey, flags;
+       /* struct video_clip32 * */ u32 clips;
+       s32 clipcount;
+};
+
+static void free_kvideo_clips(struct video_window *kp)
+{
+       struct video_clip *cp;
+
+       cp = kp->clips;
+       if(cp != NULL)
+               kfree(cp);
+}
+
+static int get_video_window32(struct video_window *kp, struct video_window32 *up)
+{
+       struct video_clip32 *ucp;
+       struct video_clip *kcp;
+       int nclips, err, i;
+       u32 tmp;
+
+       if(get_user(kp->x, &up->x))
+               return -EFAULT;
+       __get_user(kp->y, &up->y);
+       __get_user(kp->width, &up->width);
+       __get_user(kp->height, &up->height);
+       __get_user(kp->chromakey, &up->chromakey);
+       __get_user(kp->flags, &up->flags);
+       __get_user(kp->clipcount, &up->clipcount);
+       __get_user(tmp, &up->clips);
+       ucp = (struct video_clip32 *)A(tmp);
+       kp->clips = NULL;
+
+       nclips = kp->clipcount;
+       if(nclips == 0)
+               return 0;
+
+       if(ucp == 0)
+               return -EINVAL;
+
+       /* Peculiar interface... */
+       if(nclips < 0)
+               nclips = VIDEO_CLIPMAP_SIZE;
+
+       kcp = kmalloc(nclips * sizeof(struct video_clip), GFP_KERNEL);
+       err = -ENOMEM;
+       if(kcp == NULL)
+               goto cleanup_and_err;
+
+       kp->clips = kcp;
+       for(i = 0; i < nclips; i++) {
+               __get_user(kcp[i].x, &ucp[i].x);
+               __get_user(kcp[i].y, &ucp[i].y);
+               __get_user(kcp[i].width, &ucp[i].width);
+               __get_user(kcp[i].height, &ucp[i].height);
+               kcp[nclips].next = NULL;
+       }
+
+       return 0;
+
+cleanup_and_err:
+       free_kvideo_clips(kp);
+       return err;
+}
+
+/* You get back everything except the clips... */
+static int put_video_window32(struct video_window *kp, struct video_window32 *up)
+{
+       if(put_user(kp->x, &up->x))
+               return -EFAULT;
+       __put_user(kp->y, &up->y);
+       __put_user(kp->width, &up->width);
+       __put_user(kp->height, &up->height);
+       __put_user(kp->chromakey, &up->chromakey);
+       __put_user(kp->flags, &up->flags);
+       __put_user(kp->clipcount, &up->clipcount);
+       return 0;
+}
+
+#define VIDIOCGTUNER32         _IOWR('v',4, struct video_tuner32)
+#define VIDIOCSTUNER32         _IOW('v',5, struct video_tuner32)
+#define VIDIOCGWIN32           _IOR('v',9, struct video_window32)
+#define VIDIOCSWIN32           _IOW('v',10, struct video_window32)
+#define VIDIOCGFBUF32          _IOR('v',11, struct video_buffer32)
+#define VIDIOCSFBUF32          _IOW('v',12, struct video_buffer32)
+#define VIDIOCGFREQ32          _IOR('v',14, u32)
+#define VIDIOCSFREQ32          _IOW('v',15, u32)
+
+static int do_video_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       union {
+               struct video_tuner vt;
+               struct video_buffer vb;
+               struct video_window vw;
+               unsigned long vx;
+       } karg;
+       mm_segment_t old_fs = get_fs();
+       void *up = (void *)arg;
+       int err = 0;
+
+       /* First, convert the command. */
+       switch(cmd) {
+       case VIDIOCGTUNER32: cmd = VIDIOCGTUNER; break;
+       case VIDIOCSTUNER32: cmd = VIDIOCSTUNER; break;
+       case VIDIOCGWIN32: cmd = VIDIOCGWIN; break;
+       case VIDIOCSWIN32: cmd = VIDIOCSWIN; break;
+       case VIDIOCGFBUF32: cmd = VIDIOCGFBUF; break;
+       case VIDIOCSFBUF32: cmd = VIDIOCSFBUF; break;
+       case VIDIOCGFREQ32: cmd = VIDIOCGFREQ; break;
+       case VIDIOCSFREQ32: cmd = VIDIOCSFREQ; break;
+       };
+
+       switch(cmd) {
+       case VIDIOCSTUNER:
+       case VIDIOCGTUNER:
+               err = get_video_tuner32(&karg.vt, up);
+               break;
+
+       case VIDIOCSWIN:
+               err = get_video_window32(&karg.vw, up);
+               break;
+
+       case VIDIOCSFBUF:
+               err = get_video_buffer32(&karg.vb, up);
+               break;
+
+       case VIDIOCSFREQ:
+               err = get_user(karg.vx, (u32 *)up);
+               break;
+       };
+       if(err)
+               goto out;
+
+       set_fs(KERNEL_DS);
+       err = sys_ioctl(fd, cmd, (unsigned long)&karg);
+       set_fs(old_fs);
+
+       if(cmd == VIDIOCSWIN)
+               free_kvideo_clips(&karg.vw);
+
+       if(err == 0) {
+               switch(cmd) {
+               case VIDIOCGTUNER:
+                       err = put_video_tuner32(&karg.vt, up);
+                       break;
+
+               case VIDIOCGWIN:
+                       err = put_video_window32(&karg.vw, up);
+                       break;
+
+               case VIDIOCGFBUF:
+                       err = put_video_buffer32(&karg.vb, up);
+                       break;
+
+               case VIDIOCGFREQ:
+                       err = put_user(((u32)karg.vx), (u32 *)up);
+                       break;
+               };
+       }
+out:
+       return err;
+}
+
+struct timeval32 {
+       int tv_sec;
+       int tv_usec;
+};
+
+static int do_siocgstamp(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       struct timeval32 *up = (struct timeval32 *)arg;
+       struct timeval ktv;
+       mm_segment_t old_fs = get_fs();
+       int err;
+
+       set_fs(KERNEL_DS);
+       err = sys_ioctl(fd, cmd, (unsigned long)&ktv);
+       set_fs(old_fs);
+       if(!err) {
+               err = put_user(ktv.tv_sec, &up->tv_sec);
+               err |= __put_user(ktv.tv_usec, &up->tv_usec);
+       }
+       return err;
+}
+
+struct ifmap32 {
+       u32 mem_start;
+       u32 mem_end;
+       unsigned short base_addr;
+       unsigned char irq;
+       unsigned char dma;
+       unsigned char port;
+};
+
+struct ifreq32 {
+#define IFHWADDRLEN     6
+#define IFNAMSIZ        16
+        union {
+                char    ifrn_name[IFNAMSIZ];            /* if name, e.g. "en0" */
+        } ifr_ifrn;
+        union {
+                struct  sockaddr ifru_addr;
+                struct  sockaddr ifru_dstaddr;
+                struct  sockaddr ifru_broadaddr;
+                struct  sockaddr ifru_netmask;
+                struct  sockaddr ifru_hwaddr;
+                short   ifru_flags;
+                int     ifru_ivalue;
+                int     ifru_mtu;
+                struct  ifmap32 ifru_map;
+                char    ifru_slave[IFNAMSIZ];   /* Just fits the size */
+               char    ifru_newname[IFNAMSIZ];
+                __kernel_caddr_t32 ifru_data;
+        } ifr_ifru;
+};
+
+struct ifconf32 {
+        int     ifc_len;                        /* size of buffer       */
+        __kernel_caddr_t32  ifcbuf;
+};
+
+static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       struct net_device *dev;
+       struct ifreq32 ifr32;
+       int err;
+
+       if (copy_from_user(&ifr32, (struct ifreq32 *)arg, sizeof(struct ifreq32)))
+               return -EFAULT;
+
+       dev = dev_get_by_index(ifr32.ifr_ifindex);
+       if (!dev)
+               return -ENODEV;
+
+       strncpy(ifr32.ifr_name, dev->name, sizeof(ifr32.ifr_name)-1);
+       ifr32.ifr_name[sizeof(ifr32.ifr_name)-1] = 0; 
+       
+       err = copy_to_user((struct ifreq32 *)arg, &ifr32, sizeof(struct ifreq32));
+       return (err ? -EFAULT : 0);
+}
+
+static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       struct ifconf32 ifc32;
+       struct ifconf ifc;
+       struct ifreq32 *ifr32;
+       struct ifreq *ifr;
+       mm_segment_t old_fs;
+       unsigned int i, j;
+       int err;
+
+       if (copy_from_user(&ifc32, (struct ifconf32 *)arg, sizeof(struct ifconf32)))
+               return -EFAULT;
+
+       if(ifc32.ifcbuf == 0) {
+               ifc32.ifc_len = 0;
+               ifc.ifc_len = 0;
+               ifc.ifc_buf = NULL;
+       } else {
+               ifc.ifc_len = ((ifc32.ifc_len / sizeof (struct ifreq32)) + 1) *
+                       sizeof (struct ifreq);
+               ifc.ifc_buf = kmalloc (ifc.ifc_len, GFP_KERNEL);
+               if (!ifc.ifc_buf)
+                       return -ENOMEM;
+       }
+       ifr = ifc.ifc_req;
+       ifr32 = (struct ifreq32 *)A(ifc32.ifcbuf);
+       for (i = 0; i < ifc32.ifc_len; i += sizeof (struct ifreq32)) {
+               if (copy_from_user(ifr, ifr32, sizeof (struct ifreq32))) {
+                       kfree (ifc.ifc_buf);
+                       return -EFAULT;
+               }
+               ifr++;
+               ifr32++; 
+       }
+       old_fs = get_fs(); set_fs (KERNEL_DS);
+       err = sys_ioctl (fd, SIOCGIFCONF, (unsigned long)&ifc); 
+       set_fs (old_fs);
+       if (!err) {
+               ifr = ifc.ifc_req;
+               ifr32 = (struct ifreq32 *)A(ifc32.ifcbuf);
+               for (i = 0, j = 0; i < ifc32.ifc_len && j < ifc.ifc_len;
+                    i += sizeof (struct ifreq32), j += sizeof (struct ifreq)) {
+                       int k = copy_to_user(ifr32, ifr, sizeof (struct ifreq32));
+                       ifr32++;
+                       ifr++;
+                       if (k) {
+                               err = -EFAULT;
+                               break;
+                       }
+                      
+               }
+               if (!err) {
+                       if (ifc32.ifcbuf == 0) {
+                               /* Translate from 64-bit structure multiple to
+                                * a 32-bit one.
+                                */
+                               i = ifc.ifc_len;
+                               i = ((i / sizeof(struct ifreq)) * sizeof(struct ifreq32));
+                               ifc32.ifc_len = i;
+                       } else {
+                               if (i <= ifc32.ifc_len)
+                                       ifc32.ifc_len = i;
+                               else
+                                       ifc32.ifc_len = i - sizeof (struct ifreq32);
+                       }
+                       if (copy_to_user((struct ifconf32 *)arg, &ifc32, sizeof(struct ifconf32)))
+                               err = -EFAULT;
+               }
+       }
+       if(ifc.ifc_buf != NULL)
+               kfree (ifc.ifc_buf);
+       return err;
+}
+
+static int ethtool_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       struct ifreq ifr;
+       mm_segment_t old_fs;
+       int err, len;
+       u32 data, ethcmd;
+       
+       if (copy_from_user(&ifr, (struct ifreq32 *)arg, sizeof(struct ifreq32)))
+               return -EFAULT;
+       ifr.ifr_data = (__kernel_caddr_t)get_free_page(GFP_KERNEL);
+       if (!ifr.ifr_data)
+               return -EAGAIN;
+
+       __get_user(data, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_data));
+
+       if (get_user(ethcmd, (u32 *)A(data))) {
+               err = -EFAULT;
+               goto out;
+       }
+       switch (ethcmd) {
+       case ETHTOOL_GDRVINFO:  len = sizeof(struct ethtool_drvinfo); break;
+       case ETHTOOL_GSET:
+       case ETHTOOL_SSET:
+       default:                len = sizeof(struct ethtool_cmd); break;
+       }
+
+       if (copy_from_user(ifr.ifr_data, (char *)A(data), len)) {
+               err = -EFAULT;
+               goto out;
+       }
+
+       old_fs = get_fs();
+       set_fs (KERNEL_DS);
+       err = sys_ioctl (fd, cmd, (unsigned long)&ifr);
+       set_fs (old_fs);
+       if (!err) {
+               u32 data;
+
+               __get_user(data, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_data));
+               len = copy_to_user((char *)A(data), ifr.ifr_data, len);
+               if (len)
+                       err = -EFAULT;
+       }
+
+out:
+       free_page((unsigned long)ifr.ifr_data);
+       return err;
+}
+
+static int dev_ifsioc(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       struct ifreq ifr;
+       mm_segment_t old_fs;
+       int err;
+       
+       switch (cmd) {
+       case SIOCSIFMAP:
+               err = copy_from_user(&ifr, (struct ifreq32 *)arg, sizeof(ifr.ifr_name));
+               err |= __get_user(ifr.ifr_map.mem_start, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.mem_start));
+               err |= __get_user(ifr.ifr_map.mem_end, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.mem_end));
+               err |= __get_user(ifr.ifr_map.base_addr, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.base_addr));
+               err |= __get_user(ifr.ifr_map.irq, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.irq));
+               err |= __get_user(ifr.ifr_map.dma, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.dma));
+               err |= __get_user(ifr.ifr_map.port, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.port));
+               if (err)
+                       return -EFAULT;
+               break;
+       case SIOCGPPPSTATS:
+       case SIOCGPPPCSTATS:
+       case SIOCGPPPVER:
+               if (copy_from_user(&ifr, (struct ifreq32 *)arg, sizeof(struct ifreq32)))
+                       return -EFAULT;
+               ifr.ifr_data = (__kernel_caddr_t)get_free_page(GFP_KERNEL);
+               if (!ifr.ifr_data)
+                       return -EAGAIN;
+               break;
+       default:
+               if (copy_from_user(&ifr, (struct ifreq32 *)arg, sizeof(struct ifreq32)))
+                       return -EFAULT;
+               break;
+       }
+       old_fs = get_fs();
+       set_fs (KERNEL_DS);
+       err = sys_ioctl (fd, cmd, (unsigned long)&ifr);
+       set_fs (old_fs);
+       if (!err) {
+               switch (cmd) {
+               case SIOCGIFFLAGS:
+               case SIOCGIFMETRIC:
+               case SIOCGIFMTU:
+               case SIOCGIFMEM:
+               case SIOCGIFHWADDR:
+               case SIOCGIFINDEX:
+               case SIOCGIFADDR:
+               case SIOCGIFBRDADDR:
+               case SIOCGIFDSTADDR:
+               case SIOCGIFNETMASK:
+               case SIOCGIFTXQLEN:
+                       if (copy_to_user((struct ifreq32 *)arg, &ifr, sizeof(struct ifreq32)))
+                               return -EFAULT;
+                       break;
+               case SIOCGPPPSTATS:
+               case SIOCGPPPCSTATS:
+               case SIOCGPPPVER:
+               {
+                       u32 data;
+                       int len;
+
+                       __get_user(data, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_data));
+                       if(cmd == SIOCGPPPVER)
+                               len = strlen((char *)ifr.ifr_data) + 1;
+                       else if(cmd == SIOCGPPPCSTATS)
+                               len = sizeof(struct ppp_comp_stats);
+                       else
+                               len = sizeof(struct ppp_stats);
+
+                       len = copy_to_user((char *)A(data), ifr.ifr_data, len);
+                       free_page((unsigned long)ifr.ifr_data);
+                       if(len)
+                               return -EFAULT;
+                       break;
+               }
+               case SIOCGIFMAP:
+                       err = copy_to_user((struct ifreq32 *)arg, &ifr, sizeof(ifr.ifr_name));
+                       err |= __put_user(ifr.ifr_map.mem_start, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.mem_start));
+                       err |= __put_user(ifr.ifr_map.mem_end, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.mem_end));
+                       err |= __put_user(ifr.ifr_map.base_addr, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.base_addr));
+                       err |= __put_user(ifr.ifr_map.irq, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.irq));
+                       err |= __put_user(ifr.ifr_map.dma, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.dma));
+                       err |= __put_user(ifr.ifr_map.port, &(((struct ifreq32 *)arg)->ifr_ifru.ifru_map.port));
+                       if (err)
+                               err = -EFAULT;
+                       break;
+               }
+       } else {
+               switch (cmd) {
+               case SIOCGPPPSTATS:
+               case SIOCGPPPCSTATS:
+               case SIOCGPPPVER:
+                       free_page((unsigned long)ifr.ifr_data);
+                       break;
+               }
+       }
+       return err;
+}
+
+struct rtentry32 {
+        u32            rt_pad1;
+        struct sockaddr rt_dst;         /* target address               */
+        struct sockaddr rt_gateway;     /* gateway addr (RTF_GATEWAY)   */
+        struct sockaddr rt_genmask;     /* target network mask (IP)     */
+        unsigned short  rt_flags;
+        short           rt_pad2;
+        u32            rt_pad3;
+        unsigned char   rt_tos;
+        unsigned char   rt_class;
+        short           rt_pad4;
+        short           rt_metric;      /* +1 for binary compatibility! */
+        /* char * */ u32 rt_dev;        /* forcing the device at add    */
+        u32            rt_mtu;         /* per route MTU/Window         */
+        u32            rt_window;      /* Window clamping              */
+        unsigned short  rt_irtt;        /* Initial RTT                  */
+
+};
+
+struct in6_rtmsg32 {
+       struct in6_addr         rtmsg_dst;
+       struct in6_addr         rtmsg_src;
+       struct in6_addr         rtmsg_gateway;
+       u32                     rtmsg_type;
+       u16                     rtmsg_dst_len;
+       u16                     rtmsg_src_len;
+       u32                     rtmsg_metric;
+       u32                     rtmsg_info;
+       u32                     rtmsg_flags;
+       s32                     rtmsg_ifindex;
+};
+
+extern struct socket *sockfd_lookup(int fd, int *err);
+
+static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       int ret;
+       void *r = NULL;
+       struct in6_rtmsg r6;
+       struct rtentry r4;
+       char devname[16];
+       u32 rtdev;
+       mm_segment_t old_fs = get_fs();
+       
+       struct socket *mysock = sockfd_lookup(fd, &ret);
+
+       if (mysock && mysock->sk && mysock->sk->family == AF_INET6) { /* ipv6 */
+               ret = copy_from_user (&r6.rtmsg_dst, &(((struct in6_rtmsg32 *)arg)->rtmsg_dst),
+                       3 * sizeof(struct in6_addr));
+               ret |= __get_user (r6.rtmsg_type, &(((struct in6_rtmsg32 *)arg)->rtmsg_type));
+               ret |= __get_user (r6.rtmsg_dst_len, &(((struct in6_rtmsg32 *)arg)->rtmsg_dst_len));
+               ret |= __get_user (r6.rtmsg_src_len, &(((struct in6_rtmsg32 *)arg)->rtmsg_src_len));
+               ret |= __get_user (r6.rtmsg_metric, &(((struct in6_rtmsg32 *)arg)->rtmsg_metric));
+               ret |= __get_user (r6.rtmsg_info, &(((struct in6_rtmsg32 *)arg)->rtmsg_info));
+               ret |= __get_user (r6.rtmsg_flags, &(((struct in6_rtmsg32 *)arg)->rtmsg_flags));
+               ret |= __get_user (r6.rtmsg_ifindex, &(((struct in6_rtmsg32 *)arg)->rtmsg_ifindex));
+               
+               r = (void *) &r6;
+       } else { /* ipv4 */
+               ret = copy_from_user (&r4.rt_dst, &(((struct rtentry32 *)arg)->rt_dst), 3 * sizeof(struct sockaddr));
+               ret |= __get_user (r4.rt_flags, &(((struct rtentry32 *)arg)->rt_flags));
+               ret |= __get_user (r4.rt_metric, &(((struct rtentry32 *)arg)->rt_metric));
+               ret |= __get_user (r4.rt_mtu, &(((struct rtentry32 *)arg)->rt_mtu));
+               ret |= __get_user (r4.rt_window, &(((struct rtentry32 *)arg)->rt_window));
+               ret |= __get_user (r4.rt_irtt, &(((struct rtentry32 *)arg)->rt_irtt));
+               ret |= __get_user (rtdev, &(((struct rtentry32 *)arg)->rt_dev));
+               if (rtdev) {
+                       ret |= copy_from_user (devname, (char *)A(rtdev), 15);
+                       r4.rt_dev = devname; devname[15] = 0;
+               } else
+                       r4.rt_dev = 0;
+
+               r = (void *) &r4;
+       }
+
+       if (ret)
+               return -EFAULT;
+
+       set_fs (KERNEL_DS);
+       ret = sys_ioctl (fd, cmd, (long) r);
+       set_fs (old_fs);
+
+       return ret;
+}
+
+struct hd_geometry32 {
+       unsigned char heads;
+       unsigned char sectors;
+       unsigned short cylinders;
+       u32 start;
+};
+                        
+static int hdio_getgeo(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       mm_segment_t old_fs = get_fs();
+       struct hd_geometry geo;
+       int err;
+       
+       set_fs (KERNEL_DS);
+       err = sys_ioctl(fd, HDIO_GETGEO, (unsigned long)&geo);
+       set_fs (old_fs);
+       if (!err) {
+               err = copy_to_user ((struct hd_geometry32 *)arg, &geo, 4);
+               err |= __put_user (geo.start, &(((struct hd_geometry32 *)arg)->start));
+       }
+       return err ? -EFAULT : 0;
+}
+
+struct  fbcmap32 {
+       int             index;          /* first element (0 origin) */
+       int             count;
+       u32             red;
+       u32             green;
+       u32             blue;
+};
+
+struct fb_fix_screeninfo32 {
+       char                    id[16];
+        __kernel_caddr_t32     smem_start;
+       __u32                   smem_len;
+       __u32                   type;
+       __u32                   type_aux;
+       __u32                   visual;
+       __u16                   xpanstep;
+       __u16                   ypanstep;
+       __u16                   ywrapstep;
+       __u32                   line_length;
+        __kernel_caddr_t32     mmio_start;
+       __u32                   mmio_len;
+       __u32                   accel;
+       __u16                   reserved[3];
+};
+
+struct fb_cmap32 {
+       __u32                   start;
+       __u32                   len;
+       __kernel_caddr_t32      red;
+       __kernel_caddr_t32      green;
+       __kernel_caddr_t32      blue;
+       __kernel_caddr_t32      transp;
+};
+
+static int fb_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       mm_segment_t old_fs = get_fs();
+       u32 red = 0, green = 0, blue = 0, transp = 0;
+       struct fb_fix_screeninfo fix;
+       struct fb_cmap cmap;
+       void *karg;
+       int err = 0;
+
+       memset(&cmap, 0, sizeof(cmap));
+       switch (cmd) {
+       case FBIOGET_FSCREENINFO:
+               karg = &fix;
+               break;
+       case FBIOGETCMAP:
+       case FBIOPUTCMAP:
+               karg = &cmap;
+               err = __get_user(cmap.start, &((struct fb_cmap32 *)arg)->start);
+               err |= __get_user(cmap.len, &((struct fb_cmap32 *)arg)->len);
+               err |= __get_user(red, &((struct fb_cmap32 *)arg)->red);
+               err |= __get_user(green, &((struct fb_cmap32 *)arg)->green);
+               err |= __get_user(blue, &((struct fb_cmap32 *)arg)->blue);
+               err |= __get_user(transp, &((struct fb_cmap32 *)arg)->transp);
+               if (err) {
+                       err = -EFAULT;
+                       goto out;
+               }
+               err = -ENOMEM;
+               cmap.red = kmalloc(cmap.len * sizeof(__u16), GFP_KERNEL);
+               if (!cmap.red)
+                       goto out;
+               cmap.green = kmalloc(cmap.len * sizeof(__u16), GFP_KERNEL);
+               if (!cmap.green)
+                       goto out;
+               cmap.blue = kmalloc(cmap.len * sizeof(__u16), GFP_KERNEL);
+               if (!cmap.blue)
+                       goto out;
+               if (transp) {
+                       cmap.transp = kmalloc(cmap.len * sizeof(__u16), GFP_KERNEL);
+                       if (!cmap.transp)
+                               goto out;
+               }
+                       
+               if (cmd == FBIOGETCMAP)
+                       break;
+
+               err = __copy_from_user(cmap.red, (char *)A(red), cmap.len * sizeof(__u16));
+               err |= __copy_from_user(cmap.green, (char *)A(green), cmap.len * sizeof(__u16));
+               err |= __copy_from_user(cmap.blue, (char *)A(blue), cmap.len * sizeof(__u16));
+               if (cmap.transp) err |= __copy_from_user(cmap.transp, (char *)A(transp), cmap.len * sizeof(__u16));
+               if (err) {
+                       err = -EFAULT;
+                       goto out;
+               }
+               break;
+       default:
+               do {
+                       static int count = 0;
+                       if (++count <= 20)
+                               printk("%s: Unknown fb ioctl cmd fd(%d) "
+                                      "cmd(%08x) arg(%08lx)\n",
+                                      __FUNCTION__, fd, cmd, arg);
+               } while(0);
+               return -ENOSYS;
+       }
+       set_fs(KERNEL_DS);
+       err = sys_ioctl(fd, cmd, (unsigned long)karg);
+       set_fs(old_fs);
+       if (err)
+               goto out;
+       switch (cmd) {
+       case FBIOGET_FSCREENINFO:
+               err = __copy_to_user((char *)((struct fb_fix_screeninfo32 *)arg)->id, (char *)fix.id, sizeof(fix.id));
+               err |= __put_user((__u32)(unsigned long)fix.smem_start, &((struct fb_fix_screeninfo32 *)arg)->smem_start);
+               err |= __put_user(fix.smem_len, &((struct fb_fix_screeninfo32 *)arg)->smem_len);
+               err |= __put_user(fix.type, &((struct fb_fix_screeninfo32 *)arg)->type);
+               err |= __put_user(fix.type_aux, &((struct fb_fix_screeninfo32 *)arg)->type_aux);
+               err |= __put_user(fix.visual, &((struct fb_fix_screeninfo32 *)arg)->visual);
+               err |= __put_user(fix.xpanstep, &((struct fb_fix_screeninfo32 *)arg)->xpanstep);
+               err |= __put_user(fix.ypanstep, &((struct fb_fix_screeninfo32 *)arg)->ypanstep);
+               err |= __put_user(fix.ywrapstep, &((struct fb_fix_screeninfo32 *)arg)->ywrapstep);
+               err |= __put_user(fix.line_length, &((struct fb_fix_screeninfo32 *)arg)->line_length);
+               err |= __put_user((__u32)(unsigned long)fix.mmio_start, &((struct fb_fix_screeninfo32 *)arg)->mmio_start);
+               err |= __put_user(fix.mmio_len, &((struct fb_fix_screeninfo32 *)arg)->mmio_len);
+               err |= __put_user(fix.accel, &((struct fb_fix_screeninfo32 *)arg)->accel);
+               err |= __copy_to_user((char *)((struct fb_fix_screeninfo32 *)arg)->reserved, (char *)fix.reserved, sizeof(fix.reserved));
+               break;
+       case FBIOGETCMAP:
+               err = __copy_to_user((char *)A(red), cmap.red, cmap.len * sizeof(__u16));
+               err |= __copy_to_user((char *)A(green), cmap.blue, cmap.len * sizeof(__u16));
+               err |= __copy_to_user((char *)A(blue), cmap.blue, cmap.len * sizeof(__u16));
+               if (cmap.transp)
+                       err |= __copy_to_user((char *)A(transp), cmap.transp, cmap.len * sizeof(__u16));
+               break;
+       case FBIOPUTCMAP:
+               break;
+       }
+       if (err)
+               err = -EFAULT;
+
+out:   if (cmap.red) kfree(cmap.red);
+       if (cmap.green) kfree(cmap.green);
+       if (cmap.blue) kfree(cmap.blue);
+       if (cmap.transp) kfree(cmap.transp);
+       return err;
+}
+
+static int hdio_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       mm_segment_t old_fs = get_fs();
+       unsigned long kval;
+       unsigned int *uvp;
+       int error;
+
+       set_fs(KERNEL_DS);
+       error = sys_ioctl(fd, cmd, (long)&kval);
+       set_fs(old_fs);
+
+       if(error == 0) {
+               uvp = (unsigned int *)arg;
+               if(put_user(kval, uvp))
+                       error = -EFAULT;
+       }
+       return error;
+}
+
+struct floppy_struct32 {
+       unsigned int    size;
+       unsigned int    sect;
+       unsigned int    head;
+       unsigned int    track;
+       unsigned int    stretch;
+       unsigned char   gap;
+       unsigned char   rate;
+       unsigned char   spec1;
+       unsigned char   fmt_gap;
+       const __kernel_caddr_t32 name;
+};
+
+struct floppy_drive_params32 {
+       char            cmos;
+       u32             max_dtr;
+       u32             hlt;
+       u32             hut;
+       u32             srt;
+       u32             spinup;
+       u32             spindown;
+       unsigned char   spindown_offset;
+       unsigned char   select_delay;
+       unsigned char   rps;
+       unsigned char   tracks;
+       u32             timeout;
+       unsigned char   interleave_sect;
+       struct floppy_max_errors max_errors;
+       char            flags;
+       char            read_track;
+       short           autodetect[8];
+       int             checkfreq;
+       int             native_format;
+};
+
+struct floppy_drive_struct32 {
+       signed char     flags;
+       u32             spinup_date;
+       u32             select_date;
+       u32             first_read_date;
+       short           probed_format;
+       short           track;
+       short           maxblock;
+       short           maxtrack;
+       int             generation;
+       int             keep_data;
+       int             fd_ref;
+       int             fd_device;
+       int             last_checked;
+       __kernel_caddr_t32 dmabuf;
+       int             bufblocks;
+};
+
+struct floppy_fdc_state32 {
+       int             spec1;
+       int             spec2;
+       int             dtr;
+       unsigned char   version;
+       unsigned char   dor;
+       u32             address;
+       unsigned int    rawcmd:2;
+       unsigned int    reset:1;
+       unsigned int    need_configure:1;
+       unsigned int    perp_mode:2;
+       unsigned int    has_fifo:1;
+       unsigned int    driver_version;
+       unsigned char   track[4];
+};
+
+struct floppy_write_errors32 {
+       unsigned int    write_errors;
+       u32             first_error_sector;
+       int             first_error_generation;
+       u32             last_error_sector;
+       int             last_error_generation;
+       unsigned int    badness;
+};
+
+#define FDSETPRM32 _IOW(2, 0x42, struct floppy_struct32)
+#define FDDEFPRM32 _IOW(2, 0x43, struct floppy_struct32)
+#define FDGETPRM32 _IOR(2, 0x04, struct floppy_struct32)
+#define FDSETDRVPRM32 _IOW(2, 0x90, struct floppy_drive_params32)
+#define FDGETDRVPRM32 _IOR(2, 0x11, struct floppy_drive_params32)
+#define FDGETDRVSTAT32 _IOR(2, 0x12, struct floppy_drive_struct32)
+#define FDPOLLDRVSTAT32 _IOR(2, 0x13, struct floppy_drive_struct32)
+#define FDGETFDCSTAT32 _IOR(2, 0x15, struct floppy_fdc_state32)
+#define FDWERRORGET32  _IOR(2, 0x17, struct floppy_write_errors32)
+
+static struct {
+       unsigned int    cmd32;
+       unsigned int    cmd;
+} fd_ioctl_trans_table[] = {
+       { FDSETPRM32, FDSETPRM },
+       { FDDEFPRM32, FDDEFPRM },
+       { FDGETPRM32, FDGETPRM },
+       { FDSETDRVPRM32, FDSETDRVPRM },
+       { FDGETDRVPRM32, FDGETDRVPRM },
+       { FDGETDRVSTAT32, FDGETDRVSTAT },
+       { FDPOLLDRVSTAT32, FDPOLLDRVSTAT },
+       { FDGETFDCSTAT32, FDGETFDCSTAT },
+       { FDWERRORGET32, FDWERRORGET }
+};
+
+#define NR_FD_IOCTL_TRANS (sizeof(fd_ioctl_trans_table)/sizeof(fd_ioctl_trans_table[0]))
+
+static int fd_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       mm_segment_t old_fs = get_fs();
+       void *karg = NULL;
+       unsigned int kcmd = 0;
+       int i, err;
+
+       for (i = 0; i < NR_FD_IOCTL_TRANS; i++)
+               if (cmd == fd_ioctl_trans_table[i].cmd32) {
+                       kcmd = fd_ioctl_trans_table[i].cmd;
+                       break;
+               }
+       if (!kcmd)
+               return -EINVAL;
+
+       switch (cmd) {
+               case FDSETPRM32:
+               case FDDEFPRM32:
+               case FDGETPRM32:
+               {
+                       struct floppy_struct *f;
+
+                       f = karg = kmalloc(sizeof(struct floppy_struct), GFP_KERNEL);
+                       if (!karg)
+                               return -ENOMEM;
+                       if (cmd == FDGETPRM32)
+                               break;
+                       err = __get_user(f->size, &((struct floppy_struct32 *)arg)->size);
+                       err |= __get_user(f->sect, &((struct floppy_struct32 *)arg)->sect);
+                       err |= __get_user(f->head, &((struct floppy_struct32 *)arg)->head);
+                       err |= __get_user(f->track, &((struct floppy_struct32 *)arg)->track);
+                       err |= __get_user(f->stretch, &((struct floppy_struct32 *)arg)->stretch);
+                       err |= __get_user(f->gap, &((struct floppy_struct32 *)arg)->gap);
+                       err |= __get_user(f->rate, &((struct floppy_struct32 *)arg)->rate);
+                       err |= __get_user(f->spec1, &((struct floppy_struct32 *)arg)->spec1);
+                       err |= __get_user(f->fmt_gap, &((struct floppy_struct32 *)arg)->fmt_gap);
+                       err |= __get_user((u64)f->name, &((struct floppy_struct32 *)arg)->name);
+                       if (err) {
+                               err = -EFAULT;
+                               goto out;
+                       }
+                       break;
+               }
+               case FDSETDRVPRM32:
+               case FDGETDRVPRM32:
+               {
+                       struct floppy_drive_params *f;
+
+                       f = karg = kmalloc(sizeof(struct floppy_drive_params), GFP_KERNEL);
+                       if (!karg)
+                               return -ENOMEM;
+                       if (cmd == FDGETDRVPRM32)
+                               break;
+                       err = __get_user(f->cmos, &((struct floppy_drive_params32 *)arg)->cmos);
+                       err |= __get_user(f->max_dtr, &((struct floppy_drive_params32 *)arg)->max_dtr);
+                       err |= __get_user(f->hlt, &((struct floppy_drive_params32 *)arg)->hlt);
+                       err |= __get_user(f->hut, &((struct floppy_drive_params32 *)arg)->hut);
+                       err |= __get_user(f->srt, &((struct floppy_drive_params32 *)arg)->srt);
+                       err |= __get_user(f->spinup, &((struct floppy_drive_params32 *)arg)->spinup);
+                       err |= __get_user(f->spindown, &((struct floppy_drive_params32 *)arg)->spindown);
+                       err |= __get_user(f->spindown_offset, &((struct floppy_drive_params32 *)arg)->spindown_offset);
+                       err |= __get_user(f->select_delay, &((struct floppy_drive_params32 *)arg)->select_delay);
+                       err |= __get_user(f->rps, &((struct floppy_drive_params32 *)arg)->rps);
+                       err |= __get_user(f->tracks, &((struct floppy_drive_params32 *)arg)->tracks);
+                       err |= __get_user(f->timeout, &((struct floppy_drive_params32 *)arg)->timeout);
+                       err |= __get_user(f->interleave_sect, &((struct floppy_drive_params32 *)arg)->interleave_sect);
+                       err |= __copy_from_user(&f->max_errors, &((struct floppy_drive_params32 *)arg)->max_errors, sizeof(f->max_errors));
+                       err |= __get_user(f->flags, &((struct floppy_drive_params32 *)arg)->flags);
+                       err |= __get_user(f->read_track, &((struct floppy_drive_params32 *)arg)->read_track);
+                       err |= __copy_from_user(f->autodetect, ((struct floppy_drive_params32 *)arg)->autodetect, sizeof(f->autodetect));
+                       err |= __get_user(f->checkfreq, &((struct floppy_drive_params32 *)arg)->checkfreq);
+                       err |= __get_user(f->native_format, &((struct floppy_drive_params32 *)arg)->native_format);
+                       if (err) {
+                               err = -EFAULT;
+                               goto out;
+                       }
+                       break;
+               }
+               case FDGETDRVSTAT32:
+               case FDPOLLDRVSTAT32:
+                       karg = kmalloc(sizeof(struct floppy_drive_struct), GFP_KERNEL);
+                       if (!karg)
+                               return -ENOMEM;
+                       break;
+               case FDGETFDCSTAT32:
+                       karg = kmalloc(sizeof(struct floppy_fdc_state), GFP_KERNEL);
+                       if (!karg)
+                               return -ENOMEM;
+                       break;
+               case FDWERRORGET32:
+                       karg = kmalloc(sizeof(struct floppy_write_errors), GFP_KERNEL);
+                       if (!karg)
+                               return -ENOMEM;
+                       break;
+               default:
+                       return -EINVAL;
+       }
+       set_fs (KERNEL_DS);
+       err = sys_ioctl (fd, kcmd, (unsigned long)karg);
+       set_fs (old_fs);
+       if (err)
+               goto out;
+       switch (cmd) {
+               case FDGETPRM32:
+               {
+                       struct floppy_struct *f = karg;
+
+                       err = __put_user(f->size, &((struct floppy_struct32 *)arg)->size);
+                       err |= __put_user(f->sect, &((struct floppy_struct32 *)arg)->sect);
+                       err |= __put_user(f->head, &((struct floppy_struct32 *)arg)->head);
+                       err |= __put_user(f->track, &((struct floppy_struct32 *)arg)->track);
+                       err |= __put_user(f->stretch, &((struct floppy_struct32 *)arg)->stretch);
+                       err |= __put_user(f->gap, &((struct floppy_struct32 *)arg)->gap);
+                       err |= __put_user(f->rate, &((struct floppy_struct32 *)arg)->rate);
+                       err |= __put_user(f->spec1, &((struct floppy_struct32 *)arg)->spec1);
+                       err |= __put_user(f->fmt_gap, &((struct floppy_struct32 *)arg)->fmt_gap);
+                       err |= __put_user((u64)f->name, &((struct floppy_struct32 *)arg)->name);
+                       break;
+               }
+               case FDGETDRVPRM32:
+               {
+                       struct floppy_drive_params *f = karg;
+
+                       err = __put_user(f->cmos, &((struct floppy_drive_params32 *)arg)->cmos);
+                       err |= __put_user(f->max_dtr, &((struct floppy_drive_params32 *)arg)->max_dtr);
+                       err |= __put_user(f->hlt, &((struct floppy_drive_params32 *)arg)->hlt);
+                       err |= __put_user(f->hut, &((struct floppy_drive_params32 *)arg)->hut);
+                       err |= __put_user(f->srt, &((struct floppy_drive_params32 *)arg)->srt);
+                       err |= __put_user(f->spinup, &((struct floppy_drive_params32 *)arg)->spinup);
+                       err |= __put_user(f->spindown, &((struct floppy_drive_params32 *)arg)->spindown);
+                       err |= __put_user(f->spindown_offset, &((struct floppy_drive_params32 *)arg)->spindown_offset);
+                       err |= __put_user(f->select_delay, &((struct floppy_drive_params32 *)arg)->select_delay);
+                       err |= __put_user(f->rps, &((struct floppy_drive_params32 *)arg)->rps);
+                       err |= __put_user(f->tracks, &((struct floppy_drive_params32 *)arg)->tracks);
+                       err |= __put_user(f->timeout, &((struct floppy_drive_params32 *)arg)->timeout);
+                       err |= __put_user(f->interleave_sect, &((struct floppy_drive_params32 *)arg)->interleave_sect);
+                       err |= __copy_to_user(&((struct floppy_drive_params32 *)arg)->max_errors, &f->max_errors, sizeof(f->max_errors));
+                       err |= __put_user(f->flags, &((struct floppy_drive_params32 *)arg)->flags);
+                       err |= __put_user(f->read_track, &((struct floppy_drive_params32 *)arg)->read_track);
+                       err |= __copy_to_user(((struct floppy_drive_params32 *)arg)->autodetect, f->autodetect, sizeof(f->autodetect));
+                       err |= __put_user(f->checkfreq, &((struct floppy_drive_params32 *)arg)->checkfreq);
+                       err |= __put_user(f->native_format, &((struct floppy_drive_params32 *)arg)->native_format);
+                       break;
+               }
+               case FDGETDRVSTAT32:
+               case FDPOLLDRVSTAT32:
+               {
+                       struct floppy_drive_struct *f = karg;
+
+                       err = __put_user(f->flags, &((struct floppy_drive_struct32 *)arg)->flags);
+                       err |= __put_user(f->spinup_date, &((struct floppy_drive_struct32 *)arg)->spinup_date);
+                       err |= __put_user(f->select_date, &((struct floppy_drive_struct32 *)arg)->select_date);
+                       err |= __put_user(f->first_read_date, &((struct floppy_drive_struct32 *)arg)->first_read_date);
+                       err |= __put_user(f->probed_format, &((struct floppy_drive_struct32 *)arg)->probed_format);
+                       err |= __put_user(f->track, &((struct floppy_drive_struct32 *)arg)->track);
+                       err |= __put_user(f->maxblock, &((struct floppy_drive_struct32 *)arg)->maxblock);
+                       err |= __put_user(f->maxtrack, &((struct floppy_drive_struct32 *)arg)->maxtrack);
+                       err |= __put_user(f->generation, &((struct floppy_drive_struct32 *)arg)->generation);
+                       err |= __put_user(f->keep_data, &((struct floppy_drive_struct32 *)arg)->keep_data);
+                       err |= __put_user(f->fd_ref, &((struct floppy_drive_struct32 *)arg)->fd_ref);
+                       err |= __put_user(f->fd_device, &((struct floppy_drive_struct32 *)arg)->fd_device);
+                       err |= __put_user(f->last_checked, &((struct floppy_drive_struct32 *)arg)->last_checked);
+                       err |= __put_user((u64)f->dmabuf, &((struct floppy_drive_struct32 *)arg)->dmabuf);
+                       err |= __put_user((u64)f->bufblocks, &((struct floppy_drive_struct32 *)arg)->bufblocks);
+                       break;
+               }
+               case FDGETFDCSTAT32:
+               {
+                       struct floppy_fdc_state *f = karg;
+
+                       err = __put_user(f->spec1, &((struct floppy_fdc_state32 *)arg)->spec1);
+                       err |= __put_user(f->spec2, &((struct floppy_fdc_state32 *)arg)->spec2);
+                       err |= __put_user(f->dtr, &((struct floppy_fdc_state32 *)arg)->dtr);
+                       err |= __put_user(f->version, &((struct floppy_fdc_state32 *)arg)->version);
+                       err |= __put_user(f->dor, &((struct floppy_fdc_state32 *)arg)->dor);
+                       err |= __put_user(f->address, &((struct floppy_fdc_state32 *)arg)->address);
+                       err |= __copy_to_user((char *)&((struct floppy_fdc_state32 *)arg)->address
+                                          + sizeof(((struct floppy_fdc_state32 *)arg)->address),
+                                          (char *)&f->address + sizeof(f->address), sizeof(int));
+                       err |= __put_user(f->driver_version, &((struct floppy_fdc_state32 *)arg)->driver_version);
+                       err |= __copy_to_user(((struct floppy_fdc_state32 *)arg)->track, f->track, sizeof(f->track));
+                       break;
+               }
+               case FDWERRORGET32:
+               {
+                       struct floppy_write_errors *f = karg;
+
+                       err = __put_user(f->write_errors, &((struct floppy_write_errors32 *)arg)->write_errors);
+                       err |= __put_user(f->first_error_sector, &((struct floppy_write_errors32 *)arg)->first_error_sector);
+                       err |= __put_user(f->first_error_generation, &((struct floppy_write_errors32 *)arg)->first_error_generation);
+                       err |= __put_user(f->last_error_sector, &((struct floppy_write_errors32 *)arg)->last_error_sector);
+                       err |= __put_user(f->last_error_generation, &((struct floppy_write_errors32 *)arg)->last_error_generation);
+                       err |= __put_user(f->badness, &((struct floppy_write_errors32 *)arg)->badness);
+                       break;
+               }
+               default:
+                       break;
+       }
+       if (err)
+               err = -EFAULT;
+
+out:   if (karg) kfree(karg);
+       return err;
+}
+
+struct ppp_option_data32 {
+       __kernel_caddr_t32      ptr;
+       __u32                   length;
+       int                     transmit;
+};
+#define PPPIOCSCOMPRESS32      _IOW('t', 77, struct ppp_option_data32)
+
+struct ppp_idle32 {
+       __kernel_time_t32 xmit_idle;
+       __kernel_time_t32 recv_idle;
+};
+#define PPPIOCGIDLE32          _IOR('t', 63, struct ppp_idle32)
+
+static int ppp_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       mm_segment_t old_fs = get_fs();
+       struct ppp_option_data32 data32;
+       struct ppp_option_data data;
+       struct ppp_idle32 idle32;
+       struct ppp_idle idle;
+       unsigned int kcmd;
+       void *karg;
+       int err = 0;
+
+       switch (cmd) {
+       case PPPIOCGIDLE32:
+               kcmd = PPPIOCGIDLE;
+               karg = &idle;
+               break;
+       case PPPIOCSCOMPRESS32:
+               if (copy_from_user(&data32, (struct ppp_option_data32 *)arg, sizeof(struct ppp_option_data32)))
+                       return -EFAULT;
+               data.ptr = kmalloc (data32.length, GFP_KERNEL);
+               if (!data.ptr)
+                       return -ENOMEM;
+               if (copy_from_user(data.ptr, (__u8 *)A(data32.ptr), data32.length)) {
+                       kfree(data.ptr);
+                       return -EFAULT;
+               }
+               data.length = data32.length;
+               data.transmit = data32.transmit;
+               kcmd = PPPIOCSCOMPRESS;
+               karg = &data;
+               break;
+       default:
+               do {
+                       static int count = 0;
+                       if (++count <= 20)
+                               printk("ppp_ioctl: Unknown cmd fd(%d) "
+                                      "cmd(%08x) arg(%08x)\n",
+                                      (int)fd, (unsigned int)cmd, (unsigned int)arg);
+               } while(0);
+               return -EINVAL;
+       }
+       set_fs (KERNEL_DS);
+       err = sys_ioctl (fd, kcmd, (unsigned long)karg);
+       set_fs (old_fs);
+       switch (cmd) {
+       case PPPIOCGIDLE32:
+               if (err)
+                       return err;
+               idle32.xmit_idle = idle.xmit_idle;
+               idle32.recv_idle = idle.recv_idle;
+               if (copy_to_user((struct ppp_idle32 *)arg, &idle32, sizeof(struct ppp_idle32)))
+                       return -EFAULT;
+               break;
+       case PPPIOCSCOMPRESS32:
+               kfree(data.ptr);
+               break;
+       default:
+               break;
+       }
+       return err;
+}
+
+
+struct mtget32 {
+       __u32   mt_type;
+       __u32   mt_resid;
+       __u32   mt_dsreg;
+       __u32   mt_gstat;
+       __u32   mt_erreg;
+       __kernel_daddr_t32      mt_fileno;
+       __kernel_daddr_t32      mt_blkno;
+};
+#define MTIOCGET32     _IOR('m', 2, struct mtget32)
+
+struct mtpos32 {
+       __u32   mt_blkno;
+};
+#define MTIOCPOS32     _IOR('m', 3, struct mtpos32)
+
+struct mtconfiginfo32 {
+       __u32   mt_type;
+       __u32   ifc_type;
+       __u16   irqnr;
+       __u16   dmanr;
+       __u16   port;
+       __u32   debug;
+       __u32   have_dens:1;
+       __u32   have_bsf:1;
+       __u32   have_fsr:1;
+       __u32   have_bsr:1;
+       __u32   have_eod:1;
+       __u32   have_seek:1;
+       __u32   have_tell:1;
+       __u32   have_ras1:1;
+       __u32   have_ras2:1;
+       __u32   have_ras3:1;
+       __u32   have_qfa:1;
+       __u32   pad1:5;
+       char    reserved[10];
+};
+#define        MTIOCGETCONFIG32        _IOR('m', 4, struct mtconfiginfo32)
+#define        MTIOCSETCONFIG32        _IOW('m', 5, struct mtconfiginfo32)
+
+static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       mm_segment_t old_fs = get_fs();
+       struct mtconfiginfo info;
+       struct mtget get;
+       struct mtpos pos;
+       unsigned long kcmd;
+       void *karg;
+       int err = 0;
+
+       switch(cmd) {
+       case MTIOCPOS32:
+               kcmd = MTIOCPOS;
+               karg = &pos;
+               break;
+       case MTIOCGET32:
+               kcmd = MTIOCGET;
+               karg = &get;
+               break;
+       case MTIOCGETCONFIG32:
+               kcmd = MTIOCGETCONFIG;
+               karg = &info;
+               break;
+       case MTIOCSETCONFIG32:
+               kcmd = MTIOCSETCONFIG;
+               karg = &info;
+               err = __get_user(info.mt_type, &((struct mtconfiginfo32 *)arg)->mt_type);
+               err |= __get_user(info.ifc_type, &((struct mtconfiginfo32 *)arg)->ifc_type);
+               err |= __get_user(info.irqnr, &((struct mtconfiginfo32 *)arg)->irqnr);
+               err |= __get_user(info.dmanr, &((struct mtconfiginfo32 *)arg)->dmanr);
+               err |= __get_user(info.port, &((struct mtconfiginfo32 *)arg)->port);
+               err |= __get_user(info.debug, &((struct mtconfiginfo32 *)arg)->debug);
+               err |= __copy_from_user((char *)&info.debug + sizeof(info.debug),
+                                    (char *)&((struct mtconfiginfo32 *)arg)->debug
+                                    + sizeof(((struct mtconfiginfo32 *)arg)->debug), sizeof(__u32));
+               if (err)
+                       return -EFAULT;
+               break;
+       default:
+               do {
+                       static int count = 0;
+                       if (++count <= 20)
+                               printk("mt_ioctl: Unknown cmd fd(%d) "
+                                      "cmd(%08x) arg(%08x)\n",
+                                      (int)fd, (unsigned int)cmd, (unsigned int)arg);
+               } while(0);
+               return -EINVAL;
+       }
+       set_fs (KERNEL_DS);
+       err = sys_ioctl (fd, kcmd, (unsigned long)karg);
+       set_fs (old_fs);
+       if (err)
+               return err;
+       switch (cmd) {
+       case MTIOCPOS32:
+               err = __put_user(pos.mt_blkno, &((struct mtpos32 *)arg)->mt_blkno);
+               break;
+       case MTIOCGET32:
+               err = __put_user(get.mt_type, &((struct mtget32 *)arg)->mt_type);
+               err |= __put_user(get.mt_resid, &((struct mtget32 *)arg)->mt_resid);
+               err |= __put_user(get.mt_dsreg, &((struct mtget32 *)arg)->mt_dsreg);
+               err |= __put_user(get.mt_gstat, &((struct mtget32 *)arg)->mt_gstat);
+               err |= __put_user(get.mt_erreg, &((struct mtget32 *)arg)->mt_erreg);
+               err |= __put_user(get.mt_fileno, &((struct mtget32 *)arg)->mt_fileno);
+               err |= __put_user(get.mt_blkno, &((struct mtget32 *)arg)->mt_blkno);
+               break;
+       case MTIOCGETCONFIG32:
+               err = __put_user(info.mt_type, &((struct mtconfiginfo32 *)arg)->mt_type);
+               err |= __put_user(info.ifc_type, &((struct mtconfiginfo32 *)arg)->ifc_type);
+               err |= __put_user(info.irqnr, &((struct mtconfiginfo32 *)arg)->irqnr);
+               err |= __put_user(info.dmanr, &((struct mtconfiginfo32 *)arg)->dmanr);
+               err |= __put_user(info.port, &((struct mtconfiginfo32 *)arg)->port);
+               err |= __put_user(info.debug, &((struct mtconfiginfo32 *)arg)->debug);
+               err |= __copy_to_user((char *)&((struct mtconfiginfo32 *)arg)->debug
+                                          + sizeof(((struct mtconfiginfo32 *)arg)->debug),
+                                          (char *)&info.debug + sizeof(info.debug), sizeof(__u32));
+               break;
+       case MTIOCSETCONFIG32:
+               break;
+       }
+       return err ? -EFAULT: 0;
+}
+
+struct cdrom_read32 {
+       int                     cdread_lba;
+       __kernel_caddr_t32      cdread_bufaddr;
+       int                     cdread_buflen;
+};
+
+struct cdrom_read_audio32 {
+       union cdrom_addr        addr;
+       u_char                  addr_format;
+       int                     nframes;
+       __kernel_caddr_t32      buf;
+};
+
+struct cdrom_generic_command32 {
+       unsigned char           cmd[CDROM_PACKET_SIZE];
+       __kernel_caddr_t32      buffer;
+       unsigned int            buflen;
+       int                     stat;
+       __kernel_caddr_t32      sense;
+       __kernel_caddr_t32      reserved[3];
+};
+
+static int cdrom_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       mm_segment_t old_fs = get_fs();
+       struct cdrom_read cdread;
+       struct cdrom_read_audio cdreadaudio;
+       struct cdrom_generic_command cgc;
+       __kernel_caddr_t32 addr;
+       char *data = 0;
+       void *karg;
+       int err = 0;
+
+       switch(cmd) {
+       case CDROMREADMODE2:
+       case CDROMREADMODE1:
+       case CDROMREADRAW:
+       case CDROMREADCOOKED:
+               karg = &cdread;
+               err = __get_user(cdread.cdread_lba, &((struct cdrom_read32 *)arg)->cdread_lba);
+               err |= __get_user(addr, &((struct cdrom_read32 *)arg)->cdread_bufaddr);
+               err |= __get_user(cdread.cdread_buflen, &((struct cdrom_read32 *)arg)->cdread_buflen);
+               if (err)
+                       return -EFAULT;
+               data = kmalloc(cdread.cdread_buflen, GFP_KERNEL);
+               if (!data)
+                       return -ENOMEM;
+               cdread.cdread_bufaddr = data;
+               break;
+       case CDROMREADAUDIO:
+               karg = &cdreadaudio;
+               err = copy_from_user(&cdreadaudio.addr, &((struct cdrom_read_audio32 *)arg)->addr, sizeof(cdreadaudio.addr));
+               err |= __get_user(cdreadaudio.addr_format, &((struct cdrom_read_audio32 *)arg)->addr_format);
+               err |= __get_user(cdreadaudio.nframes, &((struct cdrom_read_audio32 *)arg)->nframes); 
+               err |= __get_user(addr, &((struct cdrom_read_audio32 *)arg)->buf);
+               if (err)
+                       return -EFAULT;
+               data = kmalloc(cdreadaudio.nframes * 2352, GFP_KERNEL);
+               if (!data)
+                       return -ENOMEM;
+               cdreadaudio.buf = data;
+               break;
+       case CDROM_SEND_PACKET:
+               karg = &cgc;
+               err = copy_from_user(cgc.cmd, &((struct cdrom_generic_command32 *)arg)->cmd, sizeof(cgc.cmd));
+               err |= __get_user(addr, &((struct cdrom_generic_command32 *)arg)->buffer);
+               err |= __get_user(cgc.buflen, &((struct cdrom_generic_command32 *)arg)->buflen);
+               if (err)
+                       return -EFAULT;
+               if ((data = kmalloc(cgc.buflen, GFP_KERNEL)) == NULL)
+                       return -ENOMEM;
+               cgc.buffer = data;
+               break;
+       default:
+               do {
+                       static int count = 0;
+                       if (++count <= 20)
+                               printk("cdrom_ioctl: Unknown cmd fd(%d) "
+                                      "cmd(%08x) arg(%08x)\n",
+                                      (int)fd, (unsigned int)cmd, (unsigned int)arg);
+               } while(0);
+               return -EINVAL;
+       }
+       set_fs (KERNEL_DS);
+       err = sys_ioctl (fd, cmd, (unsigned long)karg);
+       set_fs (old_fs);
+       if (err)
+               goto out;
+       switch (cmd) {
+       case CDROMREADMODE2:
+       case CDROMREADMODE1:
+       case CDROMREADRAW:
+       case CDROMREADCOOKED:
+               err = copy_to_user((char *)A(addr), data, cdread.cdread_buflen);
+               break;
+       case CDROMREADAUDIO:
+               err = copy_to_user((char *)A(addr), data, cdreadaudio.nframes * 2352);
+               break;
+       case CDROM_SEND_PACKET:
+               err = copy_to_user((char *)A(addr), data, cgc.buflen);
+               break;
+       default:
+               break;
+       }
+out:   if (data)
+               kfree(data);
+       return err ? -EFAULT : 0;
+}
+
+struct loop_info32 {
+       int                     lo_number;      /* ioctl r/o */
+       __kernel_dev_t32        lo_device;      /* ioctl r/o */
+       unsigned int            lo_inode;       /* ioctl r/o */
+       __kernel_dev_t32        lo_rdevice;     /* ioctl r/o */
+       int                     lo_offset;
+       int                     lo_encrypt_type;
+       int                     lo_encrypt_key_size;    /* ioctl w/o */
+       int                     lo_flags;       /* ioctl r/o */
+       char                    lo_name[LO_NAME_SIZE];
+       unsigned char           lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
+       unsigned int            lo_init[2];
+       char                    reserved[4];
+};
+
+static int loop_status(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       mm_segment_t old_fs = get_fs();
+       struct loop_info l;
+       int err = -EINVAL;
+
+       switch(cmd) {
+       case LOOP_SET_STATUS:
+               err = get_user(l.lo_number, &((struct loop_info32 *)arg)->lo_number);
+               err |= __get_user(l.lo_device, &((struct loop_info32 *)arg)->lo_device);
+               err |= __get_user(l.lo_inode, &((struct loop_info32 *)arg)->lo_inode);
+               err |= __get_user(l.lo_rdevice, &((struct loop_info32 *)arg)->lo_rdevice);
+               err |= __copy_from_user((char *)&l.lo_offset, (char *)&((struct loop_info32 *)arg)->lo_offset,
+                                          8 + (unsigned long)l.lo_init - (unsigned long)&l.lo_offset);
+               if (err) {
+                       err = -EFAULT;
+               } else {
+                       set_fs (KERNEL_DS);
+                       err = sys_ioctl (fd, cmd, (unsigned long)&l);
+                       set_fs (old_fs);
+               }
+               break;
+       case LOOP_GET_STATUS:
+               set_fs (KERNEL_DS);
+               err = sys_ioctl (fd, cmd, (unsigned long)&l);
+               set_fs (old_fs);
+               if (!err) {
+                       err = put_user(l.lo_number, &((struct loop_info32 *)arg)->lo_number);
+                       err |= __put_user(l.lo_device, &((struct loop_info32 *)arg)->lo_device);
+                       err |= __put_user(l.lo_inode, &((struct loop_info32 *)arg)->lo_inode);
+                       err |= __put_user(l.lo_rdevice, &((struct loop_info32 *)arg)->lo_rdevice);
+                       err |= __copy_to_user((char *)&((struct loop_info32 *)arg)->lo_offset,
+                                          (char *)&l.lo_offset, (unsigned long)l.lo_init - (unsigned long)&l.lo_offset);
+                       if (err)
+                               err = -EFAULT;
+               }
+               break;
+       default: {
+               static int count = 0;
+               if (++count <= 20)
+                       printk("%s: Unknown loop ioctl cmd, fd(%d) "
+                              "cmd(%08x) arg(%08lx)\n",
+                              __FUNCTION__, fd, cmd, arg);
+       }
+       }
+       return err;
+}
+
+extern int tty_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg);
+
+static int vt_check(struct file *file)
+{
+       struct tty_struct *tty;
+       struct inode *inode = file->f_dentry->d_inode;
+       
+       if (file->f_op->ioctl != tty_ioctl)
+               return -EINVAL;
+                       
+       tty = (struct tty_struct *)file->private_data;
+       if (tty_paranoia_check(tty, inode->i_rdev, "tty_ioctl"))
+               return -EINVAL;
+                                                       
+       if (tty->driver.ioctl != vt_ioctl)
+               return -EINVAL;
+       
+       /*
+        * To have permissions to do most of the vt ioctls, we either have
+        * to be the owner of the tty, or super-user.
+        */
+       if (current->tty == tty || suser())
+               return 1;
+       return 0;                                                    
+}
+
+struct consolefontdesc32 {
+       unsigned short charcount;       /* characters in font (256 or 512) */
+       unsigned short charheight;      /* scan lines per character (1-32) */
+       u32 chardata;                   /* font data in expanded form */
+};
+
+static int do_fontx_ioctl(unsigned int fd, int cmd, struct consolefontdesc32 *user_cfd, struct file *file)
+{
+       struct consolefontdesc cfdarg;
+       struct console_font_op op;
+       int i, perm;
+
+       perm = vt_check(file);
+       if (perm < 0) return perm;
+       
+       if (copy_from_user(&cfdarg, user_cfd, sizeof(struct consolefontdesc32)))
+               return -EFAULT;
+       
+       cfdarg.chardata = (unsigned char *)A(((struct consolefontdesc32 *)&cfdarg)->chardata);
+       
+       switch (cmd) {
+       case PIO_FONTX:
+               if (!perm)
+                       return -EPERM;
+               op.op = KD_FONT_OP_SET;
+               op.flags = 0;
+               op.width = 8;
+               op.height = cfdarg.charheight;
+               op.charcount = cfdarg.charcount;
+               op.data = cfdarg.chardata;
+               return con_font_op(fg_console, &op);
+       case GIO_FONTX:
+               if (!cfdarg.chardata)
+                       return 0;
+               op.op = KD_FONT_OP_GET;
+               op.flags = 0;
+               op.width = 8;
+               op.height = cfdarg.charheight;
+               op.charcount = cfdarg.charcount;
+               op.data = cfdarg.chardata;
+               i = con_font_op(fg_console, &op);
+               if (i)
+                       return i;
+               cfdarg.charheight = op.height;
+               cfdarg.charcount = op.charcount;
+               ((struct consolefontdesc32 *)&cfdarg)->chardata = (unsigned long)cfdarg.chardata;
+               if (copy_to_user(user_cfd, &cfdarg, sizeof(struct consolefontdesc32)))
+                       return -EFAULT;
+               return 0;
+       }
+       return -EINVAL;
+}
+
+struct console_font_op32 {
+       unsigned int op;        /* operation code KD_FONT_OP_* */
+       unsigned int flags;     /* KD_FONT_FLAG_* */
+       unsigned int width, height;     /* font size */
+       unsigned int charcount;
+       u32 data;    /* font data with height fixed to 32 */
+};
+                                        
+static int do_kdfontop_ioctl(unsigned int fd, unsigned int cmd, struct console_font_op32 *fontop, struct file *file)
+{
+       struct console_font_op op;
+       int perm = vt_check(file), i;
+       struct vt_struct *vt;
+       
+       if (perm < 0) return perm;
+       
+       if (copy_from_user(&op, (void *) fontop, sizeof(struct console_font_op32)))
+               return -EFAULT;
+       if (!perm && op.op != KD_FONT_OP_GET)
+               return -EPERM;
+       op.data = (unsigned char *)A(((struct console_font_op32 *)&op)->data);
+       op.flags |= KD_FONT_FLAG_OLD;
+       vt = (struct vt_struct *)((struct tty_struct *)file->private_data)->driver_data;
+       i = con_font_op(vt->vc_num, &op);
+       if (i) return i;
+       ((struct console_font_op32 *)&op)->data = (unsigned long)op.data;
+       if (copy_to_user((void *) fontop, &op, sizeof(struct console_font_op32)))
+               return -EFAULT;
+       return 0;
+}
+
+struct unimapdesc32 {
+       unsigned short entry_ct;
+       u32 entries;
+};
+
+static int do_unimap_ioctl(unsigned int fd, unsigned int cmd, struct unimapdesc32 *user_ud, struct file *file)
+{
+       struct unimapdesc32 tmp;
+       int perm = vt_check(file);
+       
+       if (perm < 0) return perm;
+       if (copy_from_user(&tmp, user_ud, sizeof tmp))
+               return -EFAULT;
+       switch (cmd) {
+       case PIO_UNIMAP:
+               if (!perm) return -EPERM;
+               return con_set_unimap(fg_console, tmp.entry_ct, (struct unipair *)A(tmp.entries));
+       case GIO_UNIMAP:
+               return con_get_unimap(fg_console, tmp.entry_ct, &(user_ud->entry_ct), (struct unipair *)A(tmp.entries));
+       }
+       return 0;
+}
+
+static int do_smb_getmountuid(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       mm_segment_t old_fs = get_fs();
+       __kernel_uid_t kuid;
+       int err;
+
+       cmd = SMB_IOC_GETMOUNTUID;
+
+       set_fs(KERNEL_DS);
+       err = sys_ioctl(fd, cmd, (unsigned long)&kuid);
+       set_fs(old_fs);
+
+       if (err >= 0)
+               err = put_user(kuid, (__kernel_uid_t32 *)arg);
+
+       return err;
+}
+
+struct atmif_sioc32 {
+        int                number;
+        int                length;
+        __kernel_caddr_t32 arg;
+};
+
+struct atm_iobuf32 {
+       int                length;
+       __kernel_caddr_t32 buffer;
+};
+
+#define ATM_GETLINKRATE32 _IOW('a', ATMIOC_ITF+1, struct atmif_sioc32)
+#define ATM_GETNAMES32    _IOW('a', ATMIOC_ITF+3, struct atm_iobuf32)
+#define ATM_GETTYPE32     _IOW('a', ATMIOC_ITF+4, struct atmif_sioc32)
+#define ATM_GETESI32     _IOW('a', ATMIOC_ITF+5, struct atmif_sioc32)
+#define ATM_GETADDR32    _IOW('a', ATMIOC_ITF+6, struct atmif_sioc32)
+#define ATM_RSTADDR32    _IOW('a', ATMIOC_ITF+7, struct atmif_sioc32)
+#define ATM_ADDADDR32    _IOW('a', ATMIOC_ITF+8, struct atmif_sioc32)
+#define ATM_DELADDR32    _IOW('a', ATMIOC_ITF+9, struct atmif_sioc32)
+#define ATM_GETCIRANGE32  _IOW('a', ATMIOC_ITF+10, struct atmif_sioc32)
+#define ATM_SETCIRANGE32  _IOW('a', ATMIOC_ITF+11, struct atmif_sioc32)
+#define ATM_SETESI32      _IOW('a', ATMIOC_ITF+12, struct atmif_sioc32)
+#define ATM_SETESIF32     _IOW('a', ATMIOC_ITF+13, struct atmif_sioc32)
+#define ATM_GETSTAT32     _IOW('a', ATMIOC_SARCOM+0, struct atmif_sioc32)
+#define ATM_GETSTATZ32    _IOW('a', ATMIOC_SARCOM+1, struct atmif_sioc32)
+#define ATM_GETLOOP32    _IOW('a', ATMIOC_SARCOM+2, struct atmif_sioc32)
+#define ATM_SETLOOP32    _IOW('a', ATMIOC_SARCOM+3, struct atmif_sioc32)
+#define ATM_QUERYLOOP32          _IOW('a', ATMIOC_SARCOM+4, struct atmif_sioc32)
+
+static struct {
+        unsigned int cmd32;
+        unsigned int cmd;
+} atm_ioctl_map[] = {
+        { ATM_GETLINKRATE32, ATM_GETLINKRATE },
+       { ATM_GETNAMES32,    ATM_GETNAMES },
+        { ATM_GETTYPE32,     ATM_GETTYPE },
+        { ATM_GETESI32,      ATM_GETESI },
+        { ATM_GETADDR32,     ATM_GETADDR },
+        { ATM_RSTADDR32,     ATM_RSTADDR },
+        { ATM_ADDADDR32,     ATM_ADDADDR },
+        { ATM_DELADDR32,     ATM_DELADDR },
+        { ATM_GETCIRANGE32,  ATM_GETCIRANGE },
+       { ATM_SETCIRANGE32,  ATM_SETCIRANGE },
+       { ATM_SETESI32,      ATM_SETESI },
+       { ATM_SETESIF32,     ATM_SETESIF },
+       { ATM_GETSTAT32,     ATM_GETSTAT },
+       { ATM_GETSTATZ32,    ATM_GETSTATZ },
+       { ATM_GETLOOP32,     ATM_GETLOOP },
+       { ATM_SETLOOP32,     ATM_SETLOOP },
+       { ATM_QUERYLOOP32,   ATM_QUERYLOOP }
+};
+
+#define NR_ATM_IOCTL (sizeof(atm_ioctl_map)/sizeof(atm_ioctl_map[0]))
+
+
+static int do_atm_iobuf(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       struct atm_iobuf32 iobuf32;
+       struct atm_iobuf   iobuf = { 0, NULL };
+       mm_segment_t old_fs;
+       int err;
+
+       err = copy_from_user(&iobuf32, (struct atm_iobuf32*)arg,
+           sizeof(struct atm_iobuf32));
+       if (err)
+               return -EFAULT;
+
+       iobuf.length = iobuf32.length;
+
+       if (iobuf32.buffer == (__kernel_caddr_t32) NULL || iobuf32.length == 0) {
+               iobuf.buffer = (void*)(unsigned long)iobuf32.buffer;
+       } else {
+               iobuf.buffer = kmalloc(iobuf.length, GFP_KERNEL);
+               if (iobuf.buffer == NULL) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+
+               err = copy_from_user(iobuf.buffer, A(iobuf32.buffer), iobuf.length);
+               if (err) {
+                       err = -EFAULT;
+                       goto out;
+               }
+       }
+
+       old_fs = get_fs(); set_fs (KERNEL_DS);
+       err = sys_ioctl (fd, cmd, (unsigned long)&iobuf);      
+       set_fs (old_fs);
+        if(err)
+               goto out;
+
+        if(iobuf.buffer && iobuf.length > 0) {
+               err = copy_to_user(A(iobuf32.buffer), iobuf.buffer, iobuf.length);
+               if (err) {
+                       err = -EFAULT;
+                       goto out;
+               }
+       }
+       err = __put_user(iobuf.length, &(((struct atm_iobuf32*)arg)->length));
+
+ out:
+        if(iobuf32.buffer && iobuf32.length > 0)
+               kfree(iobuf.buffer);
+
+       return err;
+}
+
+
+static int do_atmif_sioc(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+        struct atmif_sioc32 sioc32;
+        struct atmif_sioc   sioc = { 0, 0, NULL };
+        mm_segment_t old_fs;
+        int err;
+        
+        err = copy_from_user(&sioc32, (struct atmif_sioc32*)arg,
+                            sizeof(struct atmif_sioc32));
+        if (err)
+                return -EFAULT;
+
+        sioc.number = sioc32.number;
+        sioc.length = sioc32.length;
+        
+       if (sioc32.arg == (__kernel_caddr_t32) NULL || sioc32.length == 0) {
+               sioc.arg = (void*)(unsigned long)sioc32.arg;
+        } else {
+                sioc.arg = kmalloc(sioc.length, GFP_KERNEL);
+                if (sioc.arg == NULL) {
+                        err = -ENOMEM;
+                       goto out;
+               }
+                
+                err = copy_from_user(sioc.arg, A(sioc32.arg), sioc32.length);
+                if (err) {
+                        err = -EFAULT;
+                        goto out;
+                }
+        }
+        
+        old_fs = get_fs(); set_fs (KERNEL_DS);
+        err = sys_ioctl (fd, cmd, (unsigned long)&sioc);       
+        set_fs (old_fs);
+        if(err) {
+                goto out;
+       }
+        
+        if(sioc.arg && sioc.length > 0) {
+                err = copy_to_user(A(sioc32.arg), sioc.arg, sioc.length);
+                if (err) {
+                        err = -EFAULT;
+                        goto out;
+                }
+        }
+        err = __put_user(sioc.length, &(((struct atmif_sioc32*)arg)->length));
+        
+ out:
+        if(sioc32.arg && sioc32.length > 0)
+               kfree(sioc.arg);
+        
+       return err;
+}
+
+
+static int do_atm_ioctl(unsigned int fd, unsigned int cmd32, unsigned long arg)
+{
+        int i;
+        unsigned int cmd = 0;
+        
+       switch (cmd32) {
+       case SONET_GETSTAT:
+       case SONET_GETSTATZ:
+       case SONET_GETDIAG:
+       case SONET_SETDIAG:
+       case SONET_CLRDIAG:
+       case SONET_SETFRAMING:
+       case SONET_GETFRAMING:
+       case SONET_GETFRSENSE:
+               return do_atmif_sioc(fd, cmd32, arg);
+       }
+
+               for (i = 0; i < NR_ATM_IOCTL; i++) {
+                       if (cmd32 == atm_ioctl_map[i].cmd32) {
+                               cmd = atm_ioctl_map[i].cmd;
+                               break;
+                       }
+               }
+               if (i == NR_ATM_IOCTL) {
+               return -EINVAL;
+               }
+        
+        switch (cmd) {
+       case ATM_GETNAMES:
+               return do_atm_iobuf(fd, cmd, arg);
+           
+       case ATM_GETLINKRATE:
+        case ATM_GETTYPE:
+        case ATM_GETESI:
+        case ATM_GETADDR:
+        case ATM_RSTADDR:
+        case ATM_ADDADDR:
+        case ATM_DELADDR:
+        case ATM_GETCIRANGE:
+       case ATM_SETCIRANGE:
+       case ATM_SETESI:
+       case ATM_SETESIF:
+       case ATM_GETSTAT:
+       case ATM_GETSTATZ:
+       case ATM_GETLOOP:
+       case ATM_SETLOOP:
+       case ATM_QUERYLOOP:
+                return do_atmif_sioc(fd, cmd, arg);
+        }
+
+        return -EINVAL;
+}
+
+#if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
+/* Ugh, LVM. Pitty it was not cleaned up before accepted :((. */
+typedef struct {
+       uint8_t vg_name[NAME_LEN];
+       uint32_t vg_number;
+       uint32_t vg_access;
+       uint32_t vg_status;
+       uint32_t lv_max;
+       uint32_t lv_cur;
+       uint32_t lv_open;
+       uint32_t pv_max;
+       uint32_t pv_cur;
+       uint32_t pv_act;
+       uint32_t dummy;
+       uint32_t vgda;
+       uint32_t pe_size;
+       uint32_t pe_total;
+       uint32_t pe_allocated;
+       uint32_t pvg_total;
+       u32 proc;
+       u32 pv[ABS_MAX_PV + 1];
+       u32 lv[ABS_MAX_LV + 1];
+       uint8_t vg_uuid[UUID_LEN+1];    /* volume group UUID */
+} vg32_t;
+
+typedef struct {
+       uint8_t id[2];
+       uint16_t version;
+       lvm_disk_data_t pv_on_disk;
+       lvm_disk_data_t vg_on_disk;
+       lvm_disk_data_t pv_namelist_on_disk;
+       lvm_disk_data_t lv_on_disk;
+       lvm_disk_data_t pe_on_disk;
+       uint8_t pv_name[NAME_LEN];
+       uint8_t vg_name[NAME_LEN];
+       uint8_t system_id[NAME_LEN];
+       kdev_t pv_dev;
+       uint32_t pv_number;
+       uint32_t pv_status;
+       uint32_t pv_allocatable;
+       uint32_t pv_size;
+       uint32_t lv_cur;
+       uint32_t pe_size;
+       uint32_t pe_total;
+       uint32_t pe_allocated;
+       uint32_t pe_stale;
+       u32 pe;
+       u32 inode;
+       uint8_t pv_uuid[UUID_LEN+1];
+} pv32_t;
+
+typedef struct {
+       char lv_name[NAME_LEN];
+       u32 lv;
+} lv_req32_t;
+
+typedef struct {
+       u32 lv_index;
+       u32 lv;
+       /* Transfer size because user space and kernel space differ */
+       uint16_t size;
+} lv_status_byindex_req32_t;
+
+typedef struct {
+       dev_t dev;
+       u32   lv;
+} lv_status_bydev_req32_t;
+
+typedef struct {
+       uint8_t lv_name[NAME_LEN];
+       kdev_t old_dev;
+       kdev_t new_dev;
+       u32 old_pe;
+       u32 new_pe;
+} le_remap_req32_t;
+
+typedef struct {
+       char pv_name[NAME_LEN];
+       u32 pv;
+} pv_status_req32_t;
+
+typedef struct {
+       uint8_t lv_name[NAME_LEN];
+       uint8_t vg_name[NAME_LEN];
+       uint32_t lv_access;
+       uint32_t lv_status;
+       uint32_t lv_open;
+       kdev_t lv_dev;
+       uint32_t lv_number;
+       uint32_t lv_mirror_copies;
+       uint32_t lv_recovery;
+       uint32_t lv_schedule;
+       uint32_t lv_size;
+       u32 lv_current_pe;
+       uint32_t lv_current_le;
+       uint32_t lv_allocated_le;
+       uint32_t lv_stripes;
+       uint32_t lv_stripesize;
+       uint32_t lv_badblock;
+       uint32_t lv_allocation;
+       uint32_t lv_io_timeout;
+       uint32_t lv_read_ahead;
+       /* delta to version 1 starts here */
+       u32 lv_snapshot_org;
+       u32 lv_snapshot_prev;
+       u32 lv_snapshot_next;
+       u32 lv_block_exception;
+       uint32_t lv_remap_ptr;
+       uint32_t lv_remap_end;
+       uint32_t lv_chunk_size;
+       uint32_t lv_snapshot_minor;
+       char dummy[200];
+} lv32_t;
+
+typedef struct {
+       u32 hash[2];
+       u32 rsector_org;
+       kdev_t rdev_org;
+       u32 rsector_new;
+       kdev_t rdev_new;
+} lv_block_exception32_t;
+
+static void put_lv_t(lv_t *l)
+{
+       if (l->lv_current_pe) vfree(l->lv_current_pe);
+       if (l->lv_block_exception) vfree(l->lv_block_exception);
+       kfree(l);
+}
+
+static lv_t *get_lv_t(u32 p, int *errp)
+{
+       int err, i;
+       u32 ptr1, ptr2;
+       size_t size;
+       lv_block_exception32_t *lbe32;
+       lv_block_exception_t *lbe;
+       lv32_t *ul = (lv32_t *)A(p);
+       lv_t *l = (lv_t *)kmalloc(sizeof(lv_t), GFP_KERNEL);
+       if (!l) {
+               *errp = -ENOMEM;
+               return NULL;
+       }
+       memset(l, 0, sizeof(lv_t));
+       err = copy_from_user(l, ul, (long)&((lv32_t *)0)->lv_current_pe);
+       err |= __copy_from_user(&l->lv_current_le, &ul->lv_current_le,
+                               ((long)&ul->lv_snapshot_org) - ((long)&ul->lv_current_le));
+       err |= __copy_from_user(&l->lv_remap_ptr, &ul->lv_remap_ptr,
+                               ((long)&ul->dummy[0]) - ((long)&ul->lv_remap_ptr));
+       err |= __get_user(ptr1, &ul->lv_current_pe);
+       err |= __get_user(ptr2, &ul->lv_block_exception);
+       if (err) {
+               kfree(l);
+               *errp = -EFAULT;
+               return NULL;
+       }
+       if (ptr1) {
+               size = l->lv_allocated_le * sizeof(pe_t);
+               l->lv_current_pe = vmalloc(size);
+               if (l->lv_current_pe)
+                       err = copy_from_user(l->lv_current_pe, (void *)A(ptr1), size);
+       }
+       if (!err && ptr2) {
+               size = l->lv_remap_end * sizeof(lv_block_exception_t);
+               l->lv_block_exception = lbe = vmalloc(size);
+               if (l->lv_block_exception) {
+                       lbe32 = (lv_block_exception32_t *)A(ptr2);
+                       memset(lbe, 0, size);
+                       for (i = 0; i < l->lv_remap_end; i++, lbe++, lbe32++) {
+                               err |= get_user(lbe->rsector_org, &lbe32->rsector_org);
+                               err |= __get_user(lbe->rdev_org, &lbe32->rdev_org);
+                               err |= __get_user(lbe->rsector_new, &lbe32->rsector_new);
+                               err |= __get_user(lbe->rdev_new, &lbe32->rdev_new);
+
+                       }
+               }
+       }
+       if (err || (ptr1 && !l->lv_current_pe) || (ptr2 && !l->lv_block_exception)) {
+               if (!err)
+                       *errp = -ENOMEM;
+               else
+                       *errp = -EFAULT;
+               put_lv_t(l);
+               return NULL;
+       }
+       return l;
+}
+
+static int copy_lv_t(u32 ptr, lv_t *l)
+{
+       int err;
+       lv32_t *ul = (lv32_t *)A(ptr);
+       u32 ptr1;
+       size_t size;
+
+       err = get_user(ptr1, &ul->lv_current_pe);
+       if (err)
+               return -EFAULT;
+       err = copy_to_user(ul, l, (long)&((lv32_t *)0)->lv_current_pe);
+       err |= __copy_to_user(&ul->lv_current_le, &l->lv_current_le,
+                               ((long)&ul->lv_snapshot_org) - ((long)&ul->lv_current_le));
+       err |= __copy_to_user(&ul->lv_remap_ptr, &l->lv_remap_ptr,
+                               ((long)&ul->dummy[0]) - ((long)&ul->lv_remap_ptr));
+       size = l->lv_allocated_le * sizeof(pe_t);
+       if (ptr1)
+               err |= __copy_to_user((void *)A(ptr1), l->lv_current_pe, size);
+       return err ? -EFAULT : 0;
+}
+
+static int do_lvm_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       vg_t *v;
+       union {
+               lv_req_t lv_req;
+               le_remap_req_t le_remap;
+               lv_status_byindex_req_t lv_byindex;
+               lv_status_bydev_req_t lv_bydev;
+               pv_status_req_t pv_status;
+       } u;
+       pv_t p;
+       int err;
+       u32 ptr = 0;
+       int i;
+       mm_segment_t old_fs;
+       void *karg = &u;
+
+       switch (cmd) {
+       case VG_STATUS:
+               v = kmalloc(sizeof(vg_t), GFP_KERNEL);
+               if (!v) return -ENOMEM;
+               karg = v;
+               break;
+       case VG_CREATE:
+               v = kmalloc(sizeof(vg_t), GFP_KERNEL);
+               if (!v) return -ENOMEM;
+               if (copy_from_user(v, (void *)arg, (long)&((vg32_t *)0)->proc) ||
+                   __get_user(v->proc, &((vg32_t *)arg)->proc)) {
+                       kfree(v);
+                       return -EFAULT;
+               }
+               if (copy_from_user(v->vg_uuid, ((vg32_t *)arg)->vg_uuid, UUID_LEN+1)) {
+                       kfree(v);
+                       return -EFAULT;
+               }
+                   
+               karg = v;
+               memset(v->pv, 0, sizeof(v->pv) + sizeof(v->lv));
+               if (v->pv_max > ABS_MAX_PV || v->lv_max > ABS_MAX_LV)
+                       return -EPERM;
+               for (i = 0; i < v->pv_max; i++) {
+                       err = __get_user(ptr, &((vg32_t *)arg)->pv[i]);
+                       if (err) break;
+                       if (ptr) {
+                               v->pv[i] = kmalloc(sizeof(pv_t), GFP_KERNEL);
+                               if (!v->pv[i]) {
+                                       err = -ENOMEM;
+                                       break;
+                               }
+                               err = copy_from_user(v->pv[i], (void *)A(ptr), sizeof(pv32_t) - 8 - UUID_LEN+1);
+                               if (err) {
+                                       err = -EFAULT;
+                                       break;
+                               }
+                               err = copy_from_user(v->pv[i]->pv_uuid, ((pv32_t *)A(ptr))->pv_uuid, UUID_LEN+1);
+                               if (err) {
+                                       err = -EFAULT;
+                                       break;
+                               }
+
+                               
+                               v->pv[i]->pe = NULL; v->pv[i]->inode = NULL;
+                       }
+               }
+               if (!err) {
+                       for (i = 0; i < v->lv_max; i++) {
+                               err = __get_user(ptr, &((vg32_t *)arg)->lv[i]);
+                               if (err) break;
+                               if (ptr) {
+                                       v->lv[i] = get_lv_t(ptr, &err);
+                                       if (err) break;
+                               }
+                       }
+               }
+               break;
+       case LV_CREATE:
+       case LV_EXTEND:
+       case LV_REDUCE:
+       case LV_REMOVE:
+       case LV_RENAME:
+       case LV_STATUS_BYNAME:
+               err = copy_from_user(&u.pv_status, arg, sizeof(u.pv_status.pv_name));
+               if (err) return -EFAULT;
+               if (cmd != LV_REMOVE) {
+                       err = __get_user(ptr, &((lv_req32_t *)arg)->lv);
+                       if (err) return err;
+                       u.lv_req.lv = get_lv_t(ptr, &err);
+               } else
+                       u.lv_req.lv = NULL;
+               break;
+
+
+       case LV_STATUS_BYINDEX:
+               err = get_user(u.lv_byindex.lv_index, &((lv_status_byindex_req32_t *)arg)->lv_index);
+               err |= __get_user(ptr, &((lv_status_byindex_req32_t *)arg)->lv);
+               if (err) return err;
+               u.lv_byindex.lv = get_lv_t(ptr, &err);
+               break;
+       case LV_STATUS_BYDEV:
+               err = get_user(u.lv_bydev.dev, &((lv_status_bydev_req32_t *)arg)->dev);
+               u.lv_bydev.lv = get_lv_t(ptr, &err);
+               if (err) return err;
+               u.lv_bydev.lv = &p;
+               p.pe = NULL; p.inode = NULL;            
+               break;          
+       case VG_EXTEND:
+               err = copy_from_user(&p, (void *)arg, sizeof(pv32_t) - 8 - UUID_LEN+1);
+               if (err) return -EFAULT;
+               err = copy_from_user(p.pv_uuid, ((pv32_t *)arg)->pv_uuid, UUID_LEN+1);
+               if (err) return -EFAULT;
+               p.pe = NULL; p.inode = NULL;
+               karg = &p;
+               break;
+       case PV_CHANGE:
+       case PV_STATUS:
+               err = copy_from_user(&u.pv_status, arg, sizeof(u.lv_req.lv_name));
+               if (err) return -EFAULT;
+               err = __get_user(ptr, &((pv_status_req32_t *)arg)->pv);
+               if (err) return err;
+               u.pv_status.pv = &p;
+               if (cmd == PV_CHANGE) {
+                       err = copy_from_user(&p, (void *)A(ptr), sizeof(pv32_t) - 8 - UUID_LEN+1);
+                       if (err) return -EFAULT;
+                       p.pe = NULL; p.inode = NULL;
+               }
+               break;
+       }
+        old_fs = get_fs(); set_fs (KERNEL_DS);
+        err = sys_ioctl (fd, cmd, (unsigned long)karg);
+        set_fs (old_fs);
+       switch (cmd) {
+       case VG_STATUS:
+               if (!err) {
+                       if (copy_to_user((void *)arg, v, (long)&((vg32_t *)0)->proc) ||
+                           clear_user(&((vg32_t *)arg)->proc, sizeof(vg32_t) - (long)&((vg32_t *)0)->proc))
+                               err = -EFAULT;
+               }
+               if (copy_to_user(((vg32_t *)arg)->vg_uuid, v->vg_uuid, UUID_LEN+1)) {
+                       err = -EFAULT;
+               }
+               kfree(v);
+               break;
+       case VG_CREATE:
+               for (i = 0; i < v->pv_max; i++)
+                       if (v->pv[i]) kfree(v->pv[i]);
+               for (i = 0; i < v->lv_max; i++)
+                       if (v->lv[i]) put_lv_t(v->lv[i]);
+               kfree(v);
+               break;
+       case LV_STATUS_BYNAME:
+               if (!err && u.lv_req.lv) err = copy_lv_t(ptr, u.lv_req.lv);
+               /* Fall through */
+        case LV_CREATE:
+       case LV_EXTEND:
+       case LV_REDUCE:
+               if (u.lv_req.lv) put_lv_t(u.lv_req.lv);
+               break;
+       case LV_STATUS_BYINDEX:
+               if (u.lv_byindex.lv) {
+                       if (!err) err = copy_lv_t(ptr, u.lv_byindex.lv);
+                       put_lv_t(u.lv_byindex.lv);
+               }
+               break;
+       case PV_STATUS:
+               if (!err) {
+                       err = copy_to_user((void *)A(ptr), &p, sizeof(pv32_t) - 8 - UUID_LEN+1);
+                       if (err) return -EFAULT;
+                       err = copy_to_user(((pv_t *)A(ptr))->pv_uuid, p.pv_uuid, UUID_LEN + 1);
+                       if (err) return -EFAULT;
+               }
+               break;
+       case LV_STATUS_BYDEV:
+               if (!err) {
+                       if (!err) err = copy_lv_t(ptr, u.lv_bydev.lv);
+                       put_lv_t(u.lv_byindex.lv);
+               }
+               break;
+       }
+       return err;
+}
+#endif
+
+#if defined(CONFIG_DRM) || defined(CONFIG_DRM_MODULE)
+/* This really belongs in include/linux/drm.h -DaveM */
+#include "../../../drivers/char/drm/drm.h"
+
+typedef struct drm32_version {
+       int    version_major;     /* Major version                          */
+       int    version_minor;     /* Minor version                          */
+       int    version_patchlevel;/* Patch level                            */
+       int    name_len;          /* Length of name buffer                  */
+       u32    name;              /* Name of driver                         */
+       int    date_len;          /* Length of date buffer                  */
+       u32    date;              /* User-space buffer to hold date         */
+       int    desc_len;          /* Length of desc buffer                  */
+       u32    desc;              /* User-space buffer to hold desc         */
+} drm32_version_t;
+#define DRM32_IOCTL_VERSION    DRM_IOWR(0x00, drm32_version_t)
+
+static int drm32_version(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       drm32_version_t *uversion = (drm32_version_t *)arg;
+       char *name_ptr, *date_ptr, *desc_ptr;
+       u32 tmp1, tmp2, tmp3;
+       drm_version_t kversion;
+       mm_segment_t old_fs;
+       int ret;
+
+       memset(&kversion, 0, sizeof(kversion));
+       if (get_user(kversion.name_len, &uversion->name_len) ||
+           get_user(kversion.date_len, &uversion->date_len) ||
+           get_user(kversion.desc_len, &uversion->desc_len) ||
+           get_user(tmp1, &uversion->name) ||
+           get_user(tmp2, &uversion->date) ||
+           get_user(tmp3, &uversion->desc))
+               return -EFAULT;
+
+       name_ptr = (char *) A(tmp1);
+       date_ptr = (char *) A(tmp2);
+       desc_ptr = (char *) A(tmp3);
+
+       ret = -ENOMEM;
+       if (kversion.name_len && name_ptr) {
+               kversion.name = kmalloc(kversion.name_len, GFP_KERNEL);
+               if (!kversion.name)
+                       goto out;
+       }
+       if (kversion.date_len && date_ptr) {
+               kversion.date = kmalloc(kversion.date_len, GFP_KERNEL);
+               if (!kversion.date)
+                       goto out;
+       }
+       if (kversion.desc_len && desc_ptr) {
+               kversion.desc = kmalloc(kversion.desc_len, GFP_KERNEL);
+               if (!kversion.desc)
+                       goto out;
+       }
+
+        old_fs = get_fs();
+       set_fs(KERNEL_DS);
+        ret = sys_ioctl (fd, DRM_IOCTL_VERSION, (unsigned long)&kversion);
+        set_fs(old_fs);
+
+       if (!ret) {
+               if ((kversion.name &&
+                    copy_to_user(name_ptr, kversion.name, kversion.name_len)) ||
+                   (kversion.date &&
+                    copy_to_user(date_ptr, kversion.date, kversion.date_len)) ||
+                   (kversion.desc &&
+                    copy_to_user(desc_ptr, kversion.desc, kversion.desc_len)))
+                       ret = -EFAULT;
+               if (put_user(kversion.version_major, &uversion->version_major) ||
+                   put_user(kversion.version_minor, &uversion->version_minor) ||
+                   put_user(kversion.version_patchlevel, &uversion->version_patchlevel) ||
+                   put_user(kversion.name_len, &uversion->name_len) ||
+                   put_user(kversion.date_len, &uversion->date_len) ||
+                   put_user(kversion.desc_len, &uversion->desc_len))
+                       ret = -EFAULT;
+       }
+
+out:
+       if (kversion.name)
+               kfree(kversion.name);
+       if (kversion.date)
+               kfree(kversion.date);
+       if (kversion.desc)
+               kfree(kversion.desc);
+       return ret;
+}
+
+typedef struct drm32_unique {
+       int     unique_len;       /* Length of unique                       */
+       u32     unique;           /* Unique name for driver instantiation   */
+} drm32_unique_t;
+#define DRM32_IOCTL_GET_UNIQUE DRM_IOWR(0x01, drm32_unique_t)
+#define DRM32_IOCTL_SET_UNIQUE DRM_IOW( 0x10, drm32_unique_t)
+
+static int drm32_getsetunique(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       drm32_unique_t *uarg = (drm32_unique_t *)arg;
+       drm_unique_t karg;
+       mm_segment_t old_fs;
+       char *uptr;
+       u32 tmp;
+       int ret;
+
+       if (get_user(karg.unique_len, &uarg->unique_len))
+               return -EFAULT;
+       karg.unique = NULL;
+
+       if (get_user(tmp, &uarg->unique))
+               return -EFAULT;
+
+       uptr = (char *) A(tmp);
+
+       if (uptr) {
+               karg.unique = kmalloc(karg.unique_len, GFP_KERNEL);
+               if (!karg.unique)
+                       return -ENOMEM;
+               if (cmd == DRM32_IOCTL_SET_UNIQUE &&
+                   copy_from_user(karg.unique, uptr, karg.unique_len)) {
+                       kfree(karg.unique);
+                       return -EFAULT;
+               }
+       }
+
+       old_fs = get_fs();
+       set_fs(KERNEL_DS);
+       if (cmd == DRM32_IOCTL_GET_UNIQUE)
+               ret = sys_ioctl (fd, DRM_IOCTL_GET_UNIQUE, (unsigned long)&karg);
+       else
+               ret = sys_ioctl (fd, DRM_IOCTL_SET_UNIQUE, (unsigned long)&karg);
+        set_fs(old_fs);
+
+       if (!ret) {
+               if (cmd == DRM32_IOCTL_GET_UNIQUE &&
+                   uptr != NULL &&
+                   copy_to_user(uptr, karg.unique, karg.unique_len))
+                       ret = -EFAULT;
+               if (put_user(karg.unique_len, &uarg->unique_len))
+                       ret = -EFAULT;
+       }
+
+       if (karg.unique != NULL)
+               kfree(karg.unique);
+
+       return ret;
+}
+
+typedef struct drm32_map {
+       u32             offset;  /* Requested physical address (0 for SAREA)*/
+       u32             size;    /* Requested physical size (bytes)         */
+       drm_map_type_t  type;    /* Type of memory to map                   */
+       drm_map_flags_t flags;   /* Flags                                   */
+       u32             handle;  /* User-space: "Handle" to pass to mmap    */
+                                /* Kernel-space: kernel-virtual address    */
+       int             mtrr;    /* MTRR slot used                          */
+                                /* Private data                            */
+} drm32_map_t;
+#define DRM32_IOCTL_ADD_MAP    DRM_IOWR(0x15, drm32_map_t)
+
+static int drm32_addmap(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       drm32_map_t *uarg = (drm32_map_t *) arg;
+       drm_map_t karg;
+       mm_segment_t old_fs;
+       u32 tmp;
+       int ret;
+
+       ret  = get_user(karg.offset, &uarg->offset);
+       ret |= get_user(karg.size, &uarg->size);
+       ret |= get_user(karg.type, &uarg->type);
+       ret |= get_user(karg.flags, &uarg->flags);
+       ret |= get_user(tmp, &uarg->handle);
+       ret |= get_user(karg.mtrr, &uarg->mtrr);
+       if (ret)
+               return -EFAULT;
+
+       karg.handle = (void *) A(tmp);
+
+       old_fs = get_fs();
+       set_fs(KERNEL_DS);
+       ret = sys_ioctl(fd, DRM_IOCTL_ADD_MAP, (unsigned long) &karg);
+       set_fs(old_fs);
+
+       if (!ret) {
+               ret  = put_user(karg.offset, &uarg->offset);
+               ret |= put_user(karg.size, &uarg->size);
+               ret |= put_user(karg.type, &uarg->type);
+               ret |= put_user(karg.flags, &uarg->flags);
+               tmp = (u32) (long)karg.handle;
+               ret |= put_user(tmp, &uarg->handle);
+               ret |= put_user(karg.mtrr, &uarg->mtrr);
+               if (ret)
+                       ret = -EFAULT;
+       }
+
+       return ret;
+}
+
+typedef struct drm32_buf_info {
+       int            count;   /* Entries in list                           */
+       u32            list;    /* (drm_buf_desc_t *) */ 
+} drm32_buf_info_t;
+#define DRM32_IOCTL_INFO_BUFS  DRM_IOWR(0x18, drm32_buf_info_t)
+
+static int drm32_info_bufs(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       drm32_buf_info_t *uarg = (drm32_buf_info_t *)arg;
+       drm_buf_desc_t *ulist;
+       drm_buf_info_t karg;
+       mm_segment_t old_fs;
+       int orig_count, ret;
+       u32 tmp;
+
+       if (get_user(karg.count, &uarg->count) ||
+           get_user(tmp, &uarg->list))
+               return -EFAULT;
+
+       ulist = (drm_buf_desc_t *) A(tmp);
+
+       orig_count = karg.count;
+
+       karg.list = kmalloc(karg.count * sizeof(drm_buf_desc_t), GFP_KERNEL);
+       if (!karg.list)
+               return -EFAULT;
+
+       old_fs = get_fs();
+       set_fs(KERNEL_DS);
+       ret = sys_ioctl(fd, DRM_IOCTL_INFO_BUFS, (unsigned long) &karg);
+       set_fs(old_fs);
+
+       if (!ret) {
+               if (karg.count <= orig_count &&
+                   (copy_to_user(ulist, karg.list,
+                                 karg.count * sizeof(drm_buf_desc_t))))
+                       ret = -EFAULT;
+               if (put_user(karg.count, &uarg->count))
+                       ret = -EFAULT;
+       }
+
+       kfree(karg.list);
+
+       return ret;
+}
+
+typedef struct drm32_buf_free {
+       int            count;
+       u32            list;    /* (int *) */
+} drm32_buf_free_t;
+#define DRM32_IOCTL_FREE_BUFS  DRM_IOW( 0x1a, drm32_buf_free_t)
+
+static int drm32_free_bufs(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       drm32_buf_free_t *uarg = (drm32_buf_free_t *)arg;
+       drm_buf_free_t karg;
+       mm_segment_t old_fs;
+       int *ulist;
+       int ret;
+       u32 tmp;
+
+       if (get_user(karg.count, &uarg->count) ||
+           get_user(tmp, &uarg->list))
+               return -EFAULT;
+
+       ulist = (int *) A(tmp);
+
+       karg.list = kmalloc(karg.count * sizeof(int), GFP_KERNEL);
+       if (!karg.list)
+               return -ENOMEM;
+
+       ret = -EFAULT;
+       if (copy_from_user(karg.list, ulist, (karg.count * sizeof(int))))
+               goto out;
+
+       old_fs = get_fs();
+       set_fs(KERNEL_DS);
+       ret = sys_ioctl(fd, DRM_IOCTL_FREE_BUFS, (unsigned long) &karg);
+       set_fs(old_fs);
+
+out:
+       kfree(karg.list);
+
+       return ret;
+}
+
+typedef struct drm32_buf_pub {
+       int               idx;         /* Index into master buflist          */
+       int               total;       /* Buffer size                        */
+       int               used;        /* Amount of buffer in use (for DMA)  */
+       u32               address;     /* Address of buffer (void *)         */
+} drm32_buf_pub_t;
+
+typedef struct drm32_buf_map {
+       int           count;    /* Length of buflist                        */
+       u32           virtual;  /* Mmaped area in user-virtual (void *)     */
+       u32           list;     /* Buffer information (drm_buf_pub_t *)     */
+} drm32_buf_map_t;
+#define DRM32_IOCTL_MAP_BUFS   DRM_IOWR(0x19, drm32_buf_map_t)
+
+static int drm32_map_bufs(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       drm32_buf_map_t *uarg = (drm32_buf_map_t *)arg;
+       drm32_buf_pub_t *ulist;
+       drm_buf_map_t karg;
+       mm_segment_t old_fs;
+       int orig_count, ret, i;
+       u32 tmp1, tmp2;
+
+       if (get_user(karg.count, &uarg->count) ||
+           get_user(tmp1, &uarg->virtual) ||
+           get_user(tmp2, &uarg->list))
+               return -EFAULT;
+
+       karg.virtual = (void *) A(tmp1);
+       ulist = (drm32_buf_pub_t *) A(tmp2);
+
+       orig_count = karg.count;
+
+       karg.list = kmalloc(karg.count * sizeof(drm_buf_pub_t), GFP_KERNEL);
+       if (!karg.list)
+               return -ENOMEM;
+
+       ret = -EFAULT;
+       for (i = 0; i < karg.count; i++) {
+               if (get_user(karg.list[i].idx, &ulist[i].idx) ||
+                   get_user(karg.list[i].total, &ulist[i].total) ||
+                   get_user(karg.list[i].used, &ulist[i].used) ||
+                   get_user(tmp1, &ulist[i].address))
+                       goto out;
+
+               karg.list[i].address = (void *) A(tmp1);
+       }
+
+       old_fs = get_fs();
+       set_fs(KERNEL_DS);
+       ret = sys_ioctl(fd, DRM_IOCTL_MAP_BUFS, (unsigned long) &karg);
+       set_fs(old_fs);
+
+       if (!ret) {
+               for (i = 0; i < orig_count; i++) {
+                       tmp1 = (u32) (long) karg.list[i].address;
+                       if (put_user(karg.list[i].idx, &ulist[i].idx) ||
+                           put_user(karg.list[i].total, &ulist[i].total) ||
+                           put_user(karg.list[i].used, &ulist[i].used) ||
+                           put_user(tmp1, &ulist[i].address)) {
+                               ret = -EFAULT;
+                               goto out;
+                       }
+               }
+               if (put_user(karg.count, &uarg->count))
+                       ret = -EFAULT;
+       }
+
+out:
+       kfree(karg.list);
+       return ret;
+}
+
+typedef struct drm32_dma {
+                               /* Indices here refer to the offset into
+                                  buflist in drm_buf_get_t.  */
+       int             context;          /* Context handle                 */
+       int             send_count;       /* Number of buffers to send      */
+       u32             send_indices;     /* List of handles to buffers (int *) */
+       u32             send_sizes;       /* Lengths of data to send (int *) */
+       drm_dma_flags_t flags;            /* Flags                          */
+       int             request_count;    /* Number of buffers requested    */
+       int             request_size;     /* Desired size for buffers       */
+       u32             request_indices;  /* Buffer information (int *)     */
+       u32             request_sizes;    /* (int *) */
+       int             granted_count;    /* Number of buffers granted      */
+} drm32_dma_t;
+#define DRM32_IOCTL_DMA             DRM_IOWR(0x29, drm32_dma_t)
+
+/* RED PEN     The DRM layer blindly dereferences the send/request
+ *             indice/size arrays even though they are userland
+ *             pointers.  -DaveM
+ */
+static int drm32_dma(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       drm32_dma_t *uarg = (drm32_dma_t *) arg;
+       int *u_si, *u_ss, *u_ri, *u_rs;
+       drm_dma_t karg;
+       mm_segment_t old_fs;
+       int ret;
+       u32 tmp1, tmp2, tmp3, tmp4;
+
+       karg.send_indices = karg.send_sizes = NULL;
+       karg.request_indices = karg.request_sizes = NULL;
+
+       if (get_user(karg.context, &uarg->context) ||
+           get_user(karg.send_count, &uarg->send_count) ||
+           get_user(tmp1, &uarg->send_indices) ||
+           get_user(tmp2, &uarg->send_sizes) ||
+           get_user(karg.flags, &uarg->flags) ||
+           get_user(karg.request_count, &uarg->request_count) ||
+           get_user(karg.request_size, &uarg->request_size) ||
+           get_user(tmp3, &uarg->request_indices) ||
+           get_user(tmp4, &uarg->request_sizes) ||
+           get_user(karg.granted_count, &uarg->granted_count))
+               return -EFAULT;
+
+       u_si = (int *) A(tmp1);
+       u_ss = (int *) A(tmp2);
+       u_ri = (int *) A(tmp3);
+       u_rs = (int *) A(tmp4);
+
+       if (karg.send_count) {
+               karg.send_indices = kmalloc(karg.send_count * sizeof(int), GFP_KERNEL);
+               karg.send_sizes = kmalloc(karg.send_count * sizeof(int), GFP_KERNEL);
+
+               ret = -ENOMEM;
+               if (!karg.send_indices || !karg.send_sizes)
+                       goto out;
+
+               ret = -EFAULT;
+               if (copy_from_user(karg.send_indices, u_si,
+                                  (karg.send_count * sizeof(int))) ||
+                   copy_from_user(karg.send_sizes, u_ss,
+                                  (karg.send_count * sizeof(int))))
+                       goto out;
+       }
+
+       if (karg.request_count) {
+               karg.request_indices = kmalloc(karg.request_count * sizeof(int), GFP_KERNEL);
+               karg.request_sizes = kmalloc(karg.request_count * sizeof(int), GFP_KERNEL);
+
+               ret = -ENOMEM;
+               if (!karg.request_indices || !karg.request_sizes)
+                       goto out;
+
+               ret = -EFAULT;
+               if (copy_from_user(karg.request_indices, u_ri,
+                                  (karg.request_count * sizeof(int))) ||
+                   copy_from_user(karg.request_sizes, u_rs,
+                                  (karg.request_count * sizeof(int))))
+                       goto out;
+       }
+
+       old_fs = get_fs();
+       set_fs(KERNEL_DS);
+       ret = sys_ioctl(fd, DRM_IOCTL_DMA, (unsigned long) &karg);
+       set_fs(old_fs);
+
+       if (!ret) {
+               if (put_user(karg.context, &uarg->context) ||
+                   put_user(karg.send_count, &uarg->send_count) ||
+                   put_user(karg.flags, &uarg->flags) ||
+                   put_user(karg.request_count, &uarg->request_count) ||
+                   put_user(karg.request_size, &uarg->request_size) ||
+                   put_user(karg.granted_count, &uarg->granted_count))
+                       ret = -EFAULT;
+
+               if (karg.send_count) {
+                       if (copy_to_user(u_si, karg.send_indices,
+                                        (karg.send_count * sizeof(int))) ||
+                           copy_to_user(u_ss, karg.send_sizes,
+                                        (karg.send_count * sizeof(int))))
+                               ret = -EFAULT;
+               }
+               if (karg.request_count) {
+                       if (copy_to_user(u_ri, karg.request_indices,
+                                        (karg.request_count * sizeof(int))) ||
+                           copy_to_user(u_rs, karg.request_sizes,
+                                        (karg.request_count * sizeof(int))))
+                               ret = -EFAULT;
+               }
+       }
+
+out:
+       if (karg.send_indices)
+               kfree(karg.send_indices);
+       if (karg.send_sizes)
+               kfree(karg.send_sizes);
+       if (karg.request_indices)
+               kfree(karg.request_indices);
+       if (karg.request_sizes)
+               kfree(karg.request_sizes);
+
+       return ret;
+}
+
+typedef struct drm32_ctx_res {
+       int             count;
+       u32             contexts; /* (drm_ctx_t *) */
+} drm32_ctx_res_t;
+#define DRM32_IOCTL_RES_CTX    DRM_IOWR(0x26, drm32_ctx_res_t)
+
+static int drm32_res_ctx(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       drm32_ctx_res_t *uarg = (drm32_ctx_res_t *) arg;
+       drm_ctx_t *ulist;
+       drm_ctx_res_t karg;
+       mm_segment_t old_fs;
+       int orig_count, ret;
+       u32 tmp;
+
+       karg.contexts = NULL;
+       if (get_user(karg.count, &uarg->count) ||
+           get_user(tmp, &uarg->contexts))
+               return -EFAULT;
+
+       ulist = (drm_ctx_t *) A(tmp);
+
+       orig_count = karg.count;
+       if (karg.count && ulist) {
+               karg.contexts = kmalloc((karg.count * sizeof(drm_ctx_t)), GFP_KERNEL);
+               if (!karg.contexts)
+                       return -ENOMEM;
+               if (copy_from_user(karg.contexts, ulist,
+                                  (karg.count * sizeof(drm_ctx_t)))) {
+                       kfree(karg.contexts);
+                       return -EFAULT;
+               }
+       }
+
+       old_fs = get_fs();
+       set_fs(KERNEL_DS);
+       ret = sys_ioctl(fd, DRM_IOCTL_RES_CTX, (unsigned long) &karg);
+       set_fs(old_fs);
+
+       if (!ret) {
+               if (orig_count) {
+                       if (copy_to_user(ulist, karg.contexts,
+                                        (orig_count * sizeof(drm_ctx_t))))
+                               ret = -EFAULT;
+               }
+               if (put_user(karg.count, &uarg->count))
+                       ret = -EFAULT;
+       }
+
+       if (karg.contexts)
+               kfree(karg.contexts);
+
+       return ret;
+}
+
+#endif
+
+static int ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       return -EINVAL;
+}
+
+static int broken_blkgetsize(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       /* The mkswap binary hard codes it to Intel value :-((( */
+       return w_long(fd, BLKGETSIZE, arg);
+}
+
+struct blkpg_ioctl_arg32 {
+       int op;
+       int flags;
+       int datalen;
+       u32 data;
+};
+                                
+static int blkpg_ioctl_trans(unsigned int fd, unsigned int cmd, struct blkpg_ioctl_arg32 *arg)
+{
+       struct blkpg_ioctl_arg a;
+       struct blkpg_partition p;
+       int err;
+       mm_segment_t old_fs = get_fs();
+       
+       err = get_user(a.op, &arg->op);
+       err |= __get_user(a.flags, &arg->flags);
+       err |= __get_user(a.datalen, &arg->datalen);
+       err |= __get_user((long)a.data, &arg->data);
+       if (err) return err;
+       switch (a.op) {
+       case BLKPG_ADD_PARTITION:
+       case BLKPG_DEL_PARTITION:
+               if (a.datalen < sizeof(struct blkpg_partition))
+                       return -EINVAL;
+                if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition)))
+                       return -EFAULT;
+               a.data = &p;
+               set_fs (KERNEL_DS);
+               err = sys_ioctl(fd, cmd, (unsigned long)&a);
+               set_fs (old_fs);
+       default:
+               return -EINVAL;
+       }                                        
+       return err;
+}
+
+static int ioc_settimeout(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       return rw_long(fd, AUTOFS_IOC_SETTIMEOUT, arg);
+}
+
+struct ioctl_trans {
+       unsigned long cmd;
+       unsigned long handler;
+       struct ioctl_trans *next;
+};
+
+#define REF_SYMBOL(handler) if (0) (void)handler;
+#define HANDLE_IOCTL2(cmd,handler) REF_SYMBOL(handler);  asm volatile(".quad %c0, " #handler ",0"::"i" (cmd)); 
+#define HANDLE_IOCTL(cmd,handler) HANDLE_IOCTL2(cmd,handler)
+#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl)
+#define IOCTL_TABLE_START void ioctl_dummy(void) { asm volatile("\nioctl_start:\n\t" );
+#define IOCTL_TABLE_END  asm volatile("\nioctl_end:"); }
+
+IOCTL_TABLE_START
+/* List here exlicitly which ioctl's are known to have
+ * compatable types passed or none at all...
+ */
+/* Big T */
+COMPATIBLE_IOCTL(TCGETA)
+COMPATIBLE_IOCTL(TCSETA)
+COMPATIBLE_IOCTL(TCSETAW)
+COMPATIBLE_IOCTL(TCSETAF)
+COMPATIBLE_IOCTL(TCSBRK)
+COMPATIBLE_IOCTL(TCXONC)
+COMPATIBLE_IOCTL(TCFLSH)
+COMPATIBLE_IOCTL(TCGETS)
+COMPATIBLE_IOCTL(TCSETS)
+COMPATIBLE_IOCTL(TCSETSW)
+COMPATIBLE_IOCTL(TCSETSF)
+COMPATIBLE_IOCTL(TIOCLINUX)
+/* Little t */
+COMPATIBLE_IOCTL(TIOCGETD)
+COMPATIBLE_IOCTL(TIOCSETD)
+COMPATIBLE_IOCTL(TIOCEXCL)
+COMPATIBLE_IOCTL(TIOCNXCL)
+COMPATIBLE_IOCTL(TIOCCONS)
+COMPATIBLE_IOCTL(TIOCGSOFTCAR)
+COMPATIBLE_IOCTL(TIOCSSOFTCAR)
+COMPATIBLE_IOCTL(TIOCSWINSZ)
+COMPATIBLE_IOCTL(TIOCGWINSZ)
+COMPATIBLE_IOCTL(TIOCMGET)
+COMPATIBLE_IOCTL(TIOCMBIC)
+COMPATIBLE_IOCTL(TIOCMBIS)
+COMPATIBLE_IOCTL(TIOCMSET)
+COMPATIBLE_IOCTL(TIOCPKT)
+COMPATIBLE_IOCTL(TIOCNOTTY)
+COMPATIBLE_IOCTL(TIOCSTI)
+COMPATIBLE_IOCTL(TIOCOUTQ)
+COMPATIBLE_IOCTL(TIOCSPGRP)
+COMPATIBLE_IOCTL(TIOCGPGRP)
+COMPATIBLE_IOCTL(TIOCSCTTY)
+COMPATIBLE_IOCTL(TIOCGPTN)
+COMPATIBLE_IOCTL(TIOCSPTLCK)
+COMPATIBLE_IOCTL(TIOCGSERIAL)
+COMPATIBLE_IOCTL(TIOCSSERIAL)
+COMPATIBLE_IOCTL(TIOCSERGETLSR)
+COMPATIBLE_IOCTL(FBIOGET_VSCREENINFO)
+COMPATIBLE_IOCTL(FBIOPUT_VSCREENINFO)
+COMPATIBLE_IOCTL(FBIOPAN_DISPLAY)
+COMPATIBLE_IOCTL(FBIOGET_FCURSORINFO)
+COMPATIBLE_IOCTL(FBIOGET_VCURSORINFO)
+COMPATIBLE_IOCTL(FBIOPUT_VCURSORINFO)
+COMPATIBLE_IOCTL(FBIOGET_CURSORSTATE)
+COMPATIBLE_IOCTL(FBIOPUT_CURSORSTATE)
+COMPATIBLE_IOCTL(FBIOGET_CON2FBMAP)
+COMPATIBLE_IOCTL(FBIOPUT_CON2FBMAP)
+/* Little f */
+COMPATIBLE_IOCTL(FIOCLEX)
+COMPATIBLE_IOCTL(FIONCLEX)
+COMPATIBLE_IOCTL(FIOASYNC)
+COMPATIBLE_IOCTL(FIONBIO)
+COMPATIBLE_IOCTL(FIONREAD)  /* This is also TIOCINQ */
+/* 0x00 */
+COMPATIBLE_IOCTL(FIBMAP)
+COMPATIBLE_IOCTL(FIGETBSZ)
+/* 0x03 -- HD/IDE ioctl's used by hdparm and friends.
+ *         Some need translations, these do not.
+ */
+COMPATIBLE_IOCTL(HDIO_GET_IDENTITY)
+COMPATIBLE_IOCTL(HDIO_SET_DMA)
+COMPATIBLE_IOCTL(HDIO_SET_KEEPSETTINGS)
+COMPATIBLE_IOCTL(HDIO_SET_UNMASKINTR)
+COMPATIBLE_IOCTL(HDIO_SET_NOWERR)
+COMPATIBLE_IOCTL(HDIO_SET_32BIT)
+COMPATIBLE_IOCTL(HDIO_SET_MULTCOUNT)
+COMPATIBLE_IOCTL(HDIO_DRIVE_CMD)
+COMPATIBLE_IOCTL(HDIO_SET_PIO_MODE)
+COMPATIBLE_IOCTL(HDIO_SCAN_HWIF)
+COMPATIBLE_IOCTL(HDIO_SET_NICE)
+/* 0x02 -- Floppy ioctls */
+COMPATIBLE_IOCTL(FDMSGON)
+COMPATIBLE_IOCTL(FDMSGOFF)
+COMPATIBLE_IOCTL(FDSETEMSGTRESH)
+COMPATIBLE_IOCTL(FDFLUSH)
+COMPATIBLE_IOCTL(FDWERRORCLR)
+COMPATIBLE_IOCTL(FDSETMAXERRS)
+COMPATIBLE_IOCTL(FDGETMAXERRS)
+COMPATIBLE_IOCTL(FDGETDRVTYP)
+COMPATIBLE_IOCTL(FDEJECT)
+COMPATIBLE_IOCTL(FDCLRPRM)
+COMPATIBLE_IOCTL(FDFMTBEG)
+COMPATIBLE_IOCTL(FDFMTEND)
+COMPATIBLE_IOCTL(FDRESET)
+COMPATIBLE_IOCTL(FDTWADDLE)
+COMPATIBLE_IOCTL(FDFMTTRK)
+COMPATIBLE_IOCTL(FDRAWCMD)
+/* 0x12 */
+COMPATIBLE_IOCTL(BLKROSET)
+COMPATIBLE_IOCTL(BLKROGET)
+COMPATIBLE_IOCTL(BLKRRPART)
+COMPATIBLE_IOCTL(BLKFLSBUF)
+COMPATIBLE_IOCTL(BLKRASET)
+COMPATIBLE_IOCTL(BLKFRASET)
+COMPATIBLE_IOCTL(BLKSECTSET)
+COMPATIBLE_IOCTL(BLKSSZGET)
+
+/* RAID */
+COMPATIBLE_IOCTL(RAID_VERSION)
+COMPATIBLE_IOCTL(GET_ARRAY_INFO)
+COMPATIBLE_IOCTL(GET_DISK_INFO)
+COMPATIBLE_IOCTL(PRINT_RAID_DEBUG)
+COMPATIBLE_IOCTL(CLEAR_ARRAY)
+COMPATIBLE_IOCTL(ADD_NEW_DISK)
+COMPATIBLE_IOCTL(HOT_REMOVE_DISK)
+COMPATIBLE_IOCTL(SET_ARRAY_INFO)
+COMPATIBLE_IOCTL(SET_DISK_INFO)
+COMPATIBLE_IOCTL(WRITE_RAID_INFO)
+COMPATIBLE_IOCTL(UNPROTECT_ARRAY)
+COMPATIBLE_IOCTL(PROTECT_ARRAY)
+COMPATIBLE_IOCTL(HOT_ADD_DISK)
+COMPATIBLE_IOCTL(SET_DISK_FAULTY)
+COMPATIBLE_IOCTL(RUN_ARRAY)
+COMPATIBLE_IOCTL(START_ARRAY)
+COMPATIBLE_IOCTL(STOP_ARRAY)
+COMPATIBLE_IOCTL(STOP_ARRAY_RO)
+COMPATIBLE_IOCTL(RESTART_ARRAY_RW)
+
+/* Big K */
+COMPATIBLE_IOCTL(PIO_FONT)
+COMPATIBLE_IOCTL(GIO_FONT)
+COMPATIBLE_IOCTL(KDSIGACCEPT)
+COMPATIBLE_IOCTL(KDGETKEYCODE)
+COMPATIBLE_IOCTL(KDSETKEYCODE)
+COMPATIBLE_IOCTL(KIOCSOUND)
+COMPATIBLE_IOCTL(KDMKTONE)
+COMPATIBLE_IOCTL(KDGKBTYPE)
+COMPATIBLE_IOCTL(KDSETMODE)
+COMPATIBLE_IOCTL(KDGETMODE)
+COMPATIBLE_IOCTL(KDSKBMODE)
+COMPATIBLE_IOCTL(KDGKBMODE)
+COMPATIBLE_IOCTL(KDSKBMETA)
+COMPATIBLE_IOCTL(KDGKBMETA)
+COMPATIBLE_IOCTL(KDGKBENT)
+COMPATIBLE_IOCTL(KDSKBENT)
+COMPATIBLE_IOCTL(KDGKBSENT)
+COMPATIBLE_IOCTL(KDSKBSENT)
+COMPATIBLE_IOCTL(KDGKBDIACR)
+COMPATIBLE_IOCTL(KDSKBDIACR)
+COMPATIBLE_IOCTL(KDGKBLED)
+COMPATIBLE_IOCTL(KDSKBLED)
+COMPATIBLE_IOCTL(KDGETLED)
+COMPATIBLE_IOCTL(KDSETLED)
+COMPATIBLE_IOCTL(GIO_SCRNMAP)
+COMPATIBLE_IOCTL(PIO_SCRNMAP)
+COMPATIBLE_IOCTL(GIO_UNISCRNMAP)
+COMPATIBLE_IOCTL(PIO_UNISCRNMAP)
+COMPATIBLE_IOCTL(PIO_FONTRESET)
+COMPATIBLE_IOCTL(PIO_UNIMAPCLR)
+/* Big S */
+COMPATIBLE_IOCTL(SCSI_IOCTL_GET_IDLUN)
+COMPATIBLE_IOCTL(SCSI_IOCTL_DOORLOCK)
+COMPATIBLE_IOCTL(SCSI_IOCTL_DOORUNLOCK)
+COMPATIBLE_IOCTL(SCSI_IOCTL_TEST_UNIT_READY)
+COMPATIBLE_IOCTL(SCSI_IOCTL_TAGGED_ENABLE)
+COMPATIBLE_IOCTL(SCSI_IOCTL_TAGGED_DISABLE)
+COMPATIBLE_IOCTL(SCSI_IOCTL_GET_BUS_NUMBER)
+COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND)
+/* Big V */
+COMPATIBLE_IOCTL(VT_SETMODE)
+COMPATIBLE_IOCTL(VT_GETMODE)
+COMPATIBLE_IOCTL(VT_GETSTATE)
+COMPATIBLE_IOCTL(VT_OPENQRY)
+COMPATIBLE_IOCTL(VT_ACTIVATE)
+COMPATIBLE_IOCTL(VT_WAITACTIVE)
+COMPATIBLE_IOCTL(VT_RELDISP)
+COMPATIBLE_IOCTL(VT_DISALLOCATE)
+COMPATIBLE_IOCTL(VT_RESIZE)
+COMPATIBLE_IOCTL(VT_RESIZEX)
+COMPATIBLE_IOCTL(VT_LOCKSWITCH)
+COMPATIBLE_IOCTL(VT_UNLOCKSWITCH)
+/* Little v, the video4linux ioctls */
+COMPATIBLE_IOCTL(VIDIOCGCAP)
+COMPATIBLE_IOCTL(VIDIOCGCHAN)
+COMPATIBLE_IOCTL(VIDIOCSCHAN)
+COMPATIBLE_IOCTL(VIDIOCGPICT)
+COMPATIBLE_IOCTL(VIDIOCSPICT)
+COMPATIBLE_IOCTL(VIDIOCCAPTURE)
+COMPATIBLE_IOCTL(VIDIOCKEY)
+COMPATIBLE_IOCTL(VIDIOCGAUDIO)
+COMPATIBLE_IOCTL(VIDIOCSAUDIO)
+COMPATIBLE_IOCTL(VIDIOCSYNC)
+COMPATIBLE_IOCTL(VIDIOCMCAPTURE)
+COMPATIBLE_IOCTL(VIDIOCGMBUF)
+COMPATIBLE_IOCTL(VIDIOCGUNIT)
+COMPATIBLE_IOCTL(VIDIOCGCAPTURE)
+COMPATIBLE_IOCTL(VIDIOCSCAPTURE)
+/* BTTV specific... */
+COMPATIBLE_IOCTL(_IOW('v',  BASE_VIDIOCPRIVATE+0, char [256]))
+COMPATIBLE_IOCTL(_IOR('v',  BASE_VIDIOCPRIVATE+1, char [256]))
+COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+2, unsigned int))
+COMPATIBLE_IOCTL(_IOW('v' , BASE_VIDIOCPRIVATE+3, char [16])) /* struct bttv_pll_info */
+COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+4, int))
+COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+5, int))
+COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+6, int))
+COMPATIBLE_IOCTL(_IOR('v' , BASE_VIDIOCPRIVATE+7, int))
+/* Little p (/dev/rtc, /dev/envctrl, etc.) */
+#if 0
+COMPATIBLE_IOCTL(_IOR('p', 20, int[7])) /* RTCGET */
+COMPATIBLE_IOCTL(_IOW('p', 21, int[7])) /* RTCSET */
+#endif
+COMPATIBLE_IOCTL(RTC_AIE_ON)
+COMPATIBLE_IOCTL(RTC_AIE_OFF)
+COMPATIBLE_IOCTL(RTC_UIE_ON)
+COMPATIBLE_IOCTL(RTC_UIE_OFF)
+COMPATIBLE_IOCTL(RTC_PIE_ON)
+COMPATIBLE_IOCTL(RTC_PIE_OFF)
+COMPATIBLE_IOCTL(RTC_WIE_ON)
+COMPATIBLE_IOCTL(RTC_WIE_OFF)
+COMPATIBLE_IOCTL(RTC_ALM_SET)
+COMPATIBLE_IOCTL(RTC_ALM_READ)
+COMPATIBLE_IOCTL(RTC_RD_TIME)
+COMPATIBLE_IOCTL(RTC_SET_TIME)
+COMPATIBLE_IOCTL(RTC_WKALM_SET)
+COMPATIBLE_IOCTL(RTC_WKALM_RD)
+COMPATIBLE_IOCTL(RTC_IRQP_READ)
+COMPATIBLE_IOCTL(RTC_IRQP_SET)
+COMPATIBLE_IOCTL(RTC_EPOCH_READ)
+COMPATIBLE_IOCTL(RTC_EPOCH_SET)
+/* Little m */
+COMPATIBLE_IOCTL(MTIOCTOP)
+/* Socket level stuff */
+COMPATIBLE_IOCTL(FIOSETOWN)
+COMPATIBLE_IOCTL(SIOCSPGRP)
+COMPATIBLE_IOCTL(FIOGETOWN)
+COMPATIBLE_IOCTL(SIOCGPGRP)
+COMPATIBLE_IOCTL(SIOCATMARK)
+COMPATIBLE_IOCTL(SIOCSIFLINK)
+COMPATIBLE_IOCTL(SIOCSIFENCAP)
+COMPATIBLE_IOCTL(SIOCGIFENCAP)
+COMPATIBLE_IOCTL(SIOCSIFBR)
+COMPATIBLE_IOCTL(SIOCGIFBR)
+COMPATIBLE_IOCTL(SIOCSARP)
+COMPATIBLE_IOCTL(SIOCGARP)
+COMPATIBLE_IOCTL(SIOCDARP)
+COMPATIBLE_IOCTL(SIOCSRARP)
+COMPATIBLE_IOCTL(SIOCGRARP)
+COMPATIBLE_IOCTL(SIOCDRARP)
+COMPATIBLE_IOCTL(SIOCADDDLCI)
+COMPATIBLE_IOCTL(SIOCDELDLCI)
+/* SG stuff */
+COMPATIBLE_IOCTL(SG_SET_TIMEOUT)
+COMPATIBLE_IOCTL(SG_GET_TIMEOUT)
+COMPATIBLE_IOCTL(SG_EMULATED_HOST)
+COMPATIBLE_IOCTL(SG_SET_TRANSFORM)
+COMPATIBLE_IOCTL(SG_GET_TRANSFORM)
+COMPATIBLE_IOCTL(SG_SET_RESERVED_SIZE)
+COMPATIBLE_IOCTL(SG_GET_RESERVED_SIZE)
+COMPATIBLE_IOCTL(SG_GET_SCSI_ID)
+COMPATIBLE_IOCTL(SG_SET_FORCE_LOW_DMA)
+COMPATIBLE_IOCTL(SG_GET_LOW_DMA)
+COMPATIBLE_IOCTL(SG_SET_FORCE_PACK_ID)
+COMPATIBLE_IOCTL(SG_GET_PACK_ID)
+COMPATIBLE_IOCTL(SG_GET_NUM_WAITING)
+COMPATIBLE_IOCTL(SG_SET_DEBUG)
+COMPATIBLE_IOCTL(SG_GET_SG_TABLESIZE)
+COMPATIBLE_IOCTL(SG_GET_COMMAND_Q)
+COMPATIBLE_IOCTL(SG_SET_COMMAND_Q)
+COMPATIBLE_IOCTL(SG_GET_VERSION_NUM)
+COMPATIBLE_IOCTL(SG_NEXT_CMD_LEN)
+COMPATIBLE_IOCTL(SG_SCSI_RESET)
+COMPATIBLE_IOCTL(SG_IO)
+COMPATIBLE_IOCTL(SG_GET_REQUEST_TABLE)
+COMPATIBLE_IOCTL(SG_SET_KEEP_ORPHAN)
+COMPATIBLE_IOCTL(SG_GET_KEEP_ORPHAN)
+/* PPP stuff */
+COMPATIBLE_IOCTL(PPPIOCGFLAGS)
+COMPATIBLE_IOCTL(PPPIOCSFLAGS)
+COMPATIBLE_IOCTL(PPPIOCGASYNCMAP)
+COMPATIBLE_IOCTL(PPPIOCSASYNCMAP)
+COMPATIBLE_IOCTL(PPPIOCGUNIT)
+COMPATIBLE_IOCTL(PPPIOCGRASYNCMAP)
+COMPATIBLE_IOCTL(PPPIOCSRASYNCMAP)
+COMPATIBLE_IOCTL(PPPIOCGMRU)
+COMPATIBLE_IOCTL(PPPIOCSMRU)
+COMPATIBLE_IOCTL(PPPIOCSMAXCID)
+COMPATIBLE_IOCTL(PPPIOCGXASYNCMAP)
+COMPATIBLE_IOCTL(PPPIOCSXASYNCMAP)
+COMPATIBLE_IOCTL(PPPIOCXFERUNIT)
+COMPATIBLE_IOCTL(PPPIOCGNPMODE)
+COMPATIBLE_IOCTL(PPPIOCSNPMODE)
+COMPATIBLE_IOCTL(PPPIOCGDEBUG)
+COMPATIBLE_IOCTL(PPPIOCSDEBUG)
+COMPATIBLE_IOCTL(PPPIOCNEWUNIT)
+COMPATIBLE_IOCTL(PPPIOCATTACH)
+COMPATIBLE_IOCTL(PPPIOCDETACH)
+COMPATIBLE_IOCTL(PPPIOCSMRRU)
+COMPATIBLE_IOCTL(PPPIOCCONNECT)
+COMPATIBLE_IOCTL(PPPIOCDISCONN)
+COMPATIBLE_IOCTL(PPPIOCATTCHAN)
+COMPATIBLE_IOCTL(PPPIOCGCHAN)
+/* PPPOX */
+COMPATIBLE_IOCTL(PPPOEIOCSFWD);
+COMPATIBLE_IOCTL(PPPOEIOCDFWD);
+/* CDROM stuff */
+COMPATIBLE_IOCTL(CDROMPAUSE)
+COMPATIBLE_IOCTL(CDROMRESUME)
+COMPATIBLE_IOCTL(CDROMPLAYMSF)
+COMPATIBLE_IOCTL(CDROMPLAYTRKIND)
+COMPATIBLE_IOCTL(CDROMREADTOCHDR)
+COMPATIBLE_IOCTL(CDROMREADTOCENTRY)
+COMPATIBLE_IOCTL(CDROMSTOP)
+COMPATIBLE_IOCTL(CDROMSTART)
+COMPATIBLE_IOCTL(CDROMEJECT)
+COMPATIBLE_IOCTL(CDROMVOLCTRL)
+COMPATIBLE_IOCTL(CDROMSUBCHNL)
+COMPATIBLE_IOCTL(CDROMEJECT_SW)
+COMPATIBLE_IOCTL(CDROMMULTISESSION)
+COMPATIBLE_IOCTL(CDROM_GET_MCN)
+COMPATIBLE_IOCTL(CDROMRESET)
+COMPATIBLE_IOCTL(CDROMVOLREAD)
+COMPATIBLE_IOCTL(CDROMSEEK)
+COMPATIBLE_IOCTL(CDROMPLAYBLK)
+COMPATIBLE_IOCTL(CDROMCLOSETRAY)
+COMPATIBLE_IOCTL(CDROM_SET_OPTIONS)
+COMPATIBLE_IOCTL(CDROM_CLEAR_OPTIONS)
+COMPATIBLE_IOCTL(CDROM_SELECT_SPEED)
+COMPATIBLE_IOCTL(CDROM_SELECT_DISC)
+COMPATIBLE_IOCTL(CDROM_MEDIA_CHANGED)
+COMPATIBLE_IOCTL(CDROM_DRIVE_STATUS)
+COMPATIBLE_IOCTL(CDROM_DISC_STATUS)
+COMPATIBLE_IOCTL(CDROM_CHANGER_NSLOTS)
+COMPATIBLE_IOCTL(CDROM_LOCKDOOR)
+COMPATIBLE_IOCTL(CDROM_DEBUG)
+COMPATIBLE_IOCTL(CDROM_GET_CAPABILITY)
+/* Big L */
+COMPATIBLE_IOCTL(LOOP_SET_FD)
+COMPATIBLE_IOCTL(LOOP_CLR_FD)
+/* Big Q for sound/OSS */
+COMPATIBLE_IOCTL(SNDCTL_SEQ_RESET)
+COMPATIBLE_IOCTL(SNDCTL_SEQ_SYNC)
+COMPATIBLE_IOCTL(SNDCTL_SYNTH_INFO)
+COMPATIBLE_IOCTL(SNDCTL_SEQ_CTRLRATE)
+COMPATIBLE_IOCTL(SNDCTL_SEQ_GETOUTCOUNT)
+COMPATIBLE_IOCTL(SNDCTL_SEQ_GETINCOUNT)
+COMPATIBLE_IOCTL(SNDCTL_SEQ_PERCMODE)
+COMPATIBLE_IOCTL(SNDCTL_FM_LOAD_INSTR)
+COMPATIBLE_IOCTL(SNDCTL_SEQ_TESTMIDI)
+COMPATIBLE_IOCTL(SNDCTL_SEQ_RESETSAMPLES)
+COMPATIBLE_IOCTL(SNDCTL_SEQ_NRSYNTHS)
+COMPATIBLE_IOCTL(SNDCTL_SEQ_NRMIDIS)
+COMPATIBLE_IOCTL(SNDCTL_MIDI_INFO)
+COMPATIBLE_IOCTL(SNDCTL_SEQ_THRESHOLD)
+COMPATIBLE_IOCTL(SNDCTL_SYNTH_MEMAVL)
+COMPATIBLE_IOCTL(SNDCTL_FM_4OP_ENABLE)
+COMPATIBLE_IOCTL(SNDCTL_SEQ_PANIC)
+COMPATIBLE_IOCTL(SNDCTL_SEQ_OUTOFBAND)
+COMPATIBLE_IOCTL(SNDCTL_SEQ_GETTIME)
+COMPATIBLE_IOCTL(SNDCTL_SYNTH_ID)
+COMPATIBLE_IOCTL(SNDCTL_SYNTH_CONTROL)
+COMPATIBLE_IOCTL(SNDCTL_SYNTH_REMOVESAMPLE)
+/* Big T for sound/OSS */
+COMPATIBLE_IOCTL(SNDCTL_TMR_TIMEBASE)
+COMPATIBLE_IOCTL(SNDCTL_TMR_START)
+COMPATIBLE_IOCTL(SNDCTL_TMR_STOP)
+COMPATIBLE_IOCTL(SNDCTL_TMR_CONTINUE)
+COMPATIBLE_IOCTL(SNDCTL_TMR_TEMPO)
+COMPATIBLE_IOCTL(SNDCTL_TMR_SOURCE)
+COMPATIBLE_IOCTL(SNDCTL_TMR_METRONOME)
+COMPATIBLE_IOCTL(SNDCTL_TMR_SELECT)
+/* Little m for sound/OSS */
+COMPATIBLE_IOCTL(SNDCTL_MIDI_PRETIME)
+COMPATIBLE_IOCTL(SNDCTL_MIDI_MPUMODE)
+COMPATIBLE_IOCTL(SNDCTL_MIDI_MPUCMD)
+/* Big P for sound/OSS */
+COMPATIBLE_IOCTL(SNDCTL_DSP_RESET)
+COMPATIBLE_IOCTL(SNDCTL_DSP_SYNC)
+COMPATIBLE_IOCTL(SNDCTL_DSP_SPEED)
+COMPATIBLE_IOCTL(SNDCTL_DSP_STEREO)
+COMPATIBLE_IOCTL(SNDCTL_DSP_GETBLKSIZE)
+COMPATIBLE_IOCTL(SNDCTL_DSP_CHANNELS)
+COMPATIBLE_IOCTL(SOUND_PCM_WRITE_FILTER)
+COMPATIBLE_IOCTL(SNDCTL_DSP_POST)
+COMPATIBLE_IOCTL(SNDCTL_DSP_SUBDIVIDE)
+COMPATIBLE_IOCTL(SNDCTL_DSP_SETFRAGMENT)
+COMPATIBLE_IOCTL(SNDCTL_DSP_GETFMTS)
+COMPATIBLE_IOCTL(SNDCTL_DSP_SETFMT)
+COMPATIBLE_IOCTL(SNDCTL_DSP_GETOSPACE)
+COMPATIBLE_IOCTL(SNDCTL_DSP_GETISPACE)
+COMPATIBLE_IOCTL(SNDCTL_DSP_NONBLOCK)
+COMPATIBLE_IOCTL(SNDCTL_DSP_GETCAPS)
+COMPATIBLE_IOCTL(SNDCTL_DSP_GETTRIGGER)
+COMPATIBLE_IOCTL(SNDCTL_DSP_SETTRIGGER)
+COMPATIBLE_IOCTL(SNDCTL_DSP_GETIPTR)
+COMPATIBLE_IOCTL(SNDCTL_DSP_GETOPTR)
+/* SNDCTL_DSP_MAPINBUF,  XXX needs translation */
+/* SNDCTL_DSP_MAPOUTBUF,  XXX needs translation */
+COMPATIBLE_IOCTL(SNDCTL_DSP_SETSYNCRO)
+COMPATIBLE_IOCTL(SNDCTL_DSP_SETDUPLEX)
+COMPATIBLE_IOCTL(SNDCTL_DSP_GETODELAY)
+COMPATIBLE_IOCTL(SNDCTL_DSP_PROFILE)
+COMPATIBLE_IOCTL(SOUND_PCM_READ_RATE)
+COMPATIBLE_IOCTL(SOUND_PCM_READ_CHANNELS)
+COMPATIBLE_IOCTL(SOUND_PCM_READ_BITS)
+COMPATIBLE_IOCTL(SOUND_PCM_READ_FILTER)
+/* Big C for sound/OSS */
+COMPATIBLE_IOCTL(SNDCTL_COPR_RESET)
+COMPATIBLE_IOCTL(SNDCTL_COPR_LOAD)
+COMPATIBLE_IOCTL(SNDCTL_COPR_RDATA)
+COMPATIBLE_IOCTL(SNDCTL_COPR_RCODE)
+COMPATIBLE_IOCTL(SNDCTL_COPR_WDATA)
+COMPATIBLE_IOCTL(SNDCTL_COPR_WCODE)
+COMPATIBLE_IOCTL(SNDCTL_COPR_RUN)
+COMPATIBLE_IOCTL(SNDCTL_COPR_HALT)
+COMPATIBLE_IOCTL(SNDCTL_COPR_SENDMSG)
+COMPATIBLE_IOCTL(SNDCTL_COPR_RCVMSG)
+/* Big M for sound/OSS */
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_VOLUME)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_BASS)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_TREBLE)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_SYNTH)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_PCM)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_SPEAKER)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_LINE)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_MIC)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_CD)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_IMIX)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_ALTPCM)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_RECLEV)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_IGAIN)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_OGAIN)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_LINE1)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_LINE2)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_LINE3)
+COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_DIGITAL1))
+COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_DIGITAL2))
+COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_DIGITAL3))
+COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_PHONEIN))
+COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_PHONEOUT))
+COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_VIDEO))
+COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_RADIO))
+COMPATIBLE_IOCTL(MIXER_READ(SOUND_MIXER_MONITOR))
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_MUTE)
+/* SOUND_MIXER_READ_ENHANCE,  same value as READ_MUTE */
+/* SOUND_MIXER_READ_LOUD,  same value as READ_MUTE */
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_RECSRC)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_DEVMASK)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_RECMASK)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_STEREODEVS)
+COMPATIBLE_IOCTL(SOUND_MIXER_READ_CAPS)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_VOLUME)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_BASS)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_TREBLE)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_SYNTH)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_PCM)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_SPEAKER)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_LINE)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_MIC)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_CD)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_IMIX)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_ALTPCM)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_RECLEV)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_IGAIN)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_OGAIN)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_LINE1)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_LINE2)
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_LINE3)
+COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_DIGITAL1))
+COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_DIGITAL2))
+COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_DIGITAL3))
+COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_PHONEIN))
+COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_PHONEOUT))
+COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_VIDEO))
+COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_RADIO))
+COMPATIBLE_IOCTL(MIXER_WRITE(SOUND_MIXER_MONITOR))
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_MUTE)
+/* SOUND_MIXER_WRITE_ENHANCE,  same value as WRITE_MUTE */
+/* SOUND_MIXER_WRITE_LOUD,  same value as WRITE_MUTE */
+COMPATIBLE_IOCTL(SOUND_MIXER_WRITE_RECSRC)
+COMPATIBLE_IOCTL(SOUND_MIXER_INFO)
+COMPATIBLE_IOCTL(SOUND_OLD_MIXER_INFO)
+COMPATIBLE_IOCTL(SOUND_MIXER_ACCESS)
+COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE1)
+COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE2)
+COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE3)
+COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE4)
+COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE5)
+COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS)
+COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS)
+COMPATIBLE_IOCTL(OSS_GETVERSION)
+/* AUTOFS */
+COMPATIBLE_IOCTL(AUTOFS_IOC_READY)
+COMPATIBLE_IOCTL(AUTOFS_IOC_FAIL)
+COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC)
+COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
+COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
+/* DEVFS */
+COMPATIBLE_IOCTL(DEVFSDIOC_GET_PROTO_REV)
+COMPATIBLE_IOCTL(DEVFSDIOC_SET_EVENT_MASK)
+COMPATIBLE_IOCTL(DEVFSDIOC_RELEASE_EVENT_QUEUE)
+COMPATIBLE_IOCTL(DEVFSDIOC_SET_DEBUG_MASK)
+/* Raw devices */
+COMPATIBLE_IOCTL(RAW_SETBIND)
+COMPATIBLE_IOCTL(RAW_GETBIND)
+/* SMB ioctls which do not need any translations */
+COMPATIBLE_IOCTL(SMB_IOC_NEWCONN)
+/* Little a */
+COMPATIBLE_IOCTL(ATMSIGD_CTRL)
+COMPATIBLE_IOCTL(ATMARPD_CTRL)
+COMPATIBLE_IOCTL(ATMLEC_CTRL)
+COMPATIBLE_IOCTL(ATMLEC_MCAST)
+COMPATIBLE_IOCTL(ATMLEC_DATA)
+COMPATIBLE_IOCTL(ATM_SETSC)
+COMPATIBLE_IOCTL(SIOCSIFATMTCP)
+COMPATIBLE_IOCTL(SIOCMKCLIP)
+COMPATIBLE_IOCTL(ATMARP_MKIP)
+COMPATIBLE_IOCTL(ATMARP_SETENTRY)
+COMPATIBLE_IOCTL(ATMARP_ENCAP)
+COMPATIBLE_IOCTL(ATMTCP_CREATE)
+COMPATIBLE_IOCTL(ATMTCP_REMOVE)
+COMPATIBLE_IOCTL(ATMMPC_CTRL)
+COMPATIBLE_IOCTL(ATMMPC_DATA)
+#if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
+/* 0xfe - lvm */
+COMPATIBLE_IOCTL(VG_SET_EXTENDABLE)
+COMPATIBLE_IOCTL(VG_STATUS_GET_COUNT)
+COMPATIBLE_IOCTL(VG_STATUS_GET_NAMELIST)
+COMPATIBLE_IOCTL(VG_REMOVE)
+COMPATIBLE_IOCTL(VG_RENAME)
+COMPATIBLE_IOCTL(VG_REDUCE)
+COMPATIBLE_IOCTL(PE_LOCK_UNLOCK)
+COMPATIBLE_IOCTL(PV_FLUSH)
+COMPATIBLE_IOCTL(LVM_LOCK_LVM)
+COMPATIBLE_IOCTL(LVM_GET_IOP_VERSION)
+#ifdef LVM_TOTAL_RESET
+COMPATIBLE_IOCTL(LVM_RESET)
+#endif
+COMPATIBLE_IOCTL(LV_SET_ACCESS)
+COMPATIBLE_IOCTL(LV_SET_STATUS)
+COMPATIBLE_IOCTL(LV_SET_ALLOCATION)
+COMPATIBLE_IOCTL(LE_REMAP)
+COMPATIBLE_IOCTL(LV_BMAP)
+COMPATIBLE_IOCTL(LV_SNAPSHOT_USE_RATE)
+#endif /* LVM */
+#if defined(CONFIG_DRM) || defined(CONFIG_DRM_MODULE)
+COMPATIBLE_IOCTL(DRM_IOCTL_GET_MAGIC)
+COMPATIBLE_IOCTL(DRM_IOCTL_IRQ_BUSID)
+COMPATIBLE_IOCTL(DRM_IOCTL_AUTH_MAGIC)
+COMPATIBLE_IOCTL(DRM_IOCTL_BLOCK)
+COMPATIBLE_IOCTL(DRM_IOCTL_UNBLOCK)
+COMPATIBLE_IOCTL(DRM_IOCTL_CONTROL)
+COMPATIBLE_IOCTL(DRM_IOCTL_ADD_BUFS)
+COMPATIBLE_IOCTL(DRM_IOCTL_MARK_BUFS)
+COMPATIBLE_IOCTL(DRM_IOCTL_ADD_CTX)
+COMPATIBLE_IOCTL(DRM_IOCTL_RM_CTX)
+COMPATIBLE_IOCTL(DRM_IOCTL_MOD_CTX)
+COMPATIBLE_IOCTL(DRM_IOCTL_GET_CTX)
+COMPATIBLE_IOCTL(DRM_IOCTL_SWITCH_CTX)
+COMPATIBLE_IOCTL(DRM_IOCTL_NEW_CTX)
+COMPATIBLE_IOCTL(DRM_IOCTL_ADD_DRAW)
+COMPATIBLE_IOCTL(DRM_IOCTL_RM_DRAW)
+COMPATIBLE_IOCTL(DRM_IOCTL_LOCK)
+COMPATIBLE_IOCTL(DRM_IOCTL_UNLOCK)
+COMPATIBLE_IOCTL(DRM_IOCTL_FINISH)
+#endif /* DRM */
+/* elevator */
+COMPATIBLE_IOCTL(BLKELVGET)
+COMPATIBLE_IOCTL(BLKELVSET)
+/* Misc. */
+COMPATIBLE_IOCTL(0x41545900)           /* ATYIO_CLKR */
+COMPATIBLE_IOCTL(0x41545901)           /* ATYIO_CLKW */
+COMPATIBLE_IOCTL(PCIIOC_CONTROLLER)
+COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_IO)
+COMPATIBLE_IOCTL(PCIIOC_MMAP_IS_MEM)
+COMPATIBLE_IOCTL(PCIIOC_WRITE_COMBINE)
+/* And these ioctls need translation */
+HANDLE_IOCTL(SIOCGIFNAME, dev_ifname32)
+HANDLE_IOCTL(SIOCGIFCONF, dev_ifconf)
+HANDLE_IOCTL(SIOCGIFFLAGS, dev_ifsioc)
+HANDLE_IOCTL(SIOCSIFFLAGS, dev_ifsioc)
+HANDLE_IOCTL(SIOCGIFMETRIC, dev_ifsioc)
+HANDLE_IOCTL(SIOCSIFMETRIC, dev_ifsioc)
+HANDLE_IOCTL(SIOCGIFMTU, dev_ifsioc)
+HANDLE_IOCTL(SIOCSIFMTU, dev_ifsioc)
+HANDLE_IOCTL(SIOCGIFMEM, dev_ifsioc)
+HANDLE_IOCTL(SIOCSIFMEM, dev_ifsioc)
+HANDLE_IOCTL(SIOCGIFHWADDR, dev_ifsioc)
+HANDLE_IOCTL(SIOCSIFHWADDR, dev_ifsioc)
+HANDLE_IOCTL(SIOCADDMULTI, dev_ifsioc)
+HANDLE_IOCTL(SIOCDELMULTI, dev_ifsioc)
+HANDLE_IOCTL(SIOCGIFINDEX, dev_ifsioc)
+HANDLE_IOCTL(SIOCGIFMAP, dev_ifsioc)
+HANDLE_IOCTL(SIOCSIFMAP, dev_ifsioc)
+HANDLE_IOCTL(SIOCGIFADDR, dev_ifsioc)
+HANDLE_IOCTL(SIOCSIFADDR, dev_ifsioc)
+HANDLE_IOCTL(SIOCGIFBRDADDR, dev_ifsioc)
+HANDLE_IOCTL(SIOCSIFBRDADDR, dev_ifsioc)
+HANDLE_IOCTL(SIOCGIFDSTADDR, dev_ifsioc)
+HANDLE_IOCTL(SIOCSIFDSTADDR, dev_ifsioc)
+HANDLE_IOCTL(SIOCGIFNETMASK, dev_ifsioc)
+HANDLE_IOCTL(SIOCSIFNETMASK, dev_ifsioc)
+HANDLE_IOCTL(SIOCSIFPFLAGS, dev_ifsioc)
+HANDLE_IOCTL(SIOCGIFPFLAGS, dev_ifsioc)
+HANDLE_IOCTL(SIOCGPPPSTATS, dev_ifsioc)
+HANDLE_IOCTL(SIOCGPPPCSTATS, dev_ifsioc)
+HANDLE_IOCTL(SIOCGPPPVER, dev_ifsioc)
+HANDLE_IOCTL(SIOCGIFTXQLEN, dev_ifsioc)
+HANDLE_IOCTL(SIOCSIFTXQLEN, dev_ifsioc)
+HANDLE_IOCTL(SIOCETHTOOL, ethtool_ioctl)
+HANDLE_IOCTL(SIOCADDRT, routing_ioctl)
+HANDLE_IOCTL(SIOCDELRT, routing_ioctl)
+/* Note SIOCRTMSG is no longer, so this is safe and * the user would have seen just an -EINVAL anyways. */
+HANDLE_IOCTL(SIOCRTMSG, ret_einval)
+HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
+HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
+HANDLE_IOCTL(BLKRAGET, w_long)
+HANDLE_IOCTL(BLKGETSIZE, w_long)
+HANDLE_IOCTL(0x1260, broken_blkgetsize)
+HANDLE_IOCTL(BLKFRAGET, w_long)
+HANDLE_IOCTL(BLKSECTGET, w_long)
+HANDLE_IOCTL(BLKPG, blkpg_ioctl_trans)
+HANDLE_IOCTL(FBIOGETCMAP, fb_ioctl_trans)
+HANDLE_IOCTL(FBIOPUTCMAP, fb_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_KEEPSETTINGS, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_UNMASKINTR, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_DMA, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_32BIT, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_MULTCOUNT, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_NOWERR, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_NICE, hdio_ioctl_trans)
+HANDLE_IOCTL(FDSETPRM32, fd_ioctl_trans)
+HANDLE_IOCTL(FDDEFPRM32, fd_ioctl_trans)
+HANDLE_IOCTL(FDGETPRM32, fd_ioctl_trans)
+HANDLE_IOCTL(FDSETDRVPRM32, fd_ioctl_trans)
+HANDLE_IOCTL(FDGETDRVPRM32, fd_ioctl_trans)
+HANDLE_IOCTL(FDGETDRVSTAT32, fd_ioctl_trans)
+HANDLE_IOCTL(FDPOLLDRVSTAT32, fd_ioctl_trans)
+HANDLE_IOCTL(FDGETFDCSTAT32, fd_ioctl_trans)
+HANDLE_IOCTL(FDWERRORGET32, fd_ioctl_trans)
+HANDLE_IOCTL(PPPIOCGIDLE32, ppp_ioctl_trans)
+HANDLE_IOCTL(PPPIOCSCOMPRESS32, ppp_ioctl_trans)
+HANDLE_IOCTL(MTIOCGET32, mt_ioctl_trans)
+HANDLE_IOCTL(MTIOCPOS32, mt_ioctl_trans)
+HANDLE_IOCTL(MTIOCGETCONFIG32, mt_ioctl_trans)
+HANDLE_IOCTL(MTIOCSETCONFIG32, mt_ioctl_trans)
+HANDLE_IOCTL(CDROMREADMODE2, cdrom_ioctl_trans)
+HANDLE_IOCTL(CDROMREADMODE1, cdrom_ioctl_trans)
+HANDLE_IOCTL(CDROMREADRAW, cdrom_ioctl_trans)
+HANDLE_IOCTL(CDROMREADCOOKED, cdrom_ioctl_trans)
+HANDLE_IOCTL(CDROMREADAUDIO, cdrom_ioctl_trans)
+HANDLE_IOCTL(CDROMREADALL, cdrom_ioctl_trans)
+HANDLE_IOCTL(CDROM_SEND_PACKET, cdrom_ioctl_trans)
+HANDLE_IOCTL(LOOP_SET_STATUS, loop_status)
+HANDLE_IOCTL(LOOP_GET_STATUS, loop_status)
+#define AUTOFS_IOC_SETTIMEOUT32 _IOWR(0x93,0x64,unsigned int)
+HANDLE_IOCTL(AUTOFS_IOC_SETTIMEOUT32, ioc_settimeout)
+HANDLE_IOCTL(PIO_FONTX, do_fontx_ioctl)
+HANDLE_IOCTL(GIO_FONTX, do_fontx_ioctl)
+HANDLE_IOCTL(PIO_UNIMAP, do_unimap_ioctl)
+HANDLE_IOCTL(GIO_UNIMAP, do_unimap_ioctl)
+HANDLE_IOCTL(KDFONTOP, do_kdfontop_ioctl)
+HANDLE_IOCTL(EXT2_IOC32_GETFLAGS, do_ext2_ioctl)
+HANDLE_IOCTL(EXT2_IOC32_SETFLAGS, do_ext2_ioctl)
+HANDLE_IOCTL(EXT2_IOC32_GETVERSION, do_ext2_ioctl)
+HANDLE_IOCTL(EXT2_IOC32_SETVERSION, do_ext2_ioctl)
+HANDLE_IOCTL(VIDIOCGTUNER32, do_video_ioctl)
+HANDLE_IOCTL(VIDIOCSTUNER32, do_video_ioctl)
+HANDLE_IOCTL(VIDIOCGWIN32, do_video_ioctl)
+HANDLE_IOCTL(VIDIOCSWIN32, do_video_ioctl)
+HANDLE_IOCTL(VIDIOCGFBUF32, do_video_ioctl)
+HANDLE_IOCTL(VIDIOCSFBUF32, do_video_ioctl)
+HANDLE_IOCTL(VIDIOCGFREQ32, do_video_ioctl)
+HANDLE_IOCTL(VIDIOCSFREQ32, do_video_ioctl)
+/* One SMB ioctl needs translations. */
+#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, __kernel_uid_t32)
+HANDLE_IOCTL(SMB_IOC_GETMOUNTUID_32, do_smb_getmountuid)
+HANDLE_IOCTL(ATM_GETLINKRATE32, do_atm_ioctl)
+HANDLE_IOCTL(ATM_GETNAMES32, do_atm_ioctl)
+HANDLE_IOCTL(ATM_GETTYPE32, do_atm_ioctl)
+HANDLE_IOCTL(ATM_GETESI32, do_atm_ioctl)
+HANDLE_IOCTL(ATM_GETADDR32, do_atm_ioctl)
+HANDLE_IOCTL(ATM_RSTADDR32, do_atm_ioctl)
+HANDLE_IOCTL(ATM_ADDADDR32, do_atm_ioctl)
+HANDLE_IOCTL(ATM_DELADDR32, do_atm_ioctl)
+HANDLE_IOCTL(ATM_GETCIRANGE32, do_atm_ioctl)
+HANDLE_IOCTL(ATM_SETCIRANGE32, do_atm_ioctl)
+HANDLE_IOCTL(ATM_SETESI32, do_atm_ioctl)
+HANDLE_IOCTL(ATM_SETESIF32, do_atm_ioctl)
+HANDLE_IOCTL(ATM_GETSTAT32, do_atm_ioctl)
+HANDLE_IOCTL(ATM_GETSTATZ32, do_atm_ioctl)
+HANDLE_IOCTL(ATM_GETLOOP32, do_atm_ioctl)
+HANDLE_IOCTL(ATM_SETLOOP32, do_atm_ioctl)
+HANDLE_IOCTL(ATM_QUERYLOOP32, do_atm_ioctl)
+HANDLE_IOCTL(SONET_GETSTAT, do_atm_ioctl)
+HANDLE_IOCTL(SONET_GETSTATZ, do_atm_ioctl)
+HANDLE_IOCTL(SONET_GETDIAG, do_atm_ioctl)
+HANDLE_IOCTL(SONET_SETDIAG, do_atm_ioctl)
+HANDLE_IOCTL(SONET_CLRDIAG, do_atm_ioctl)
+HANDLE_IOCTL(SONET_SETFRAMING, do_atm_ioctl)
+HANDLE_IOCTL(SONET_GETFRAMING, do_atm_ioctl)
+HANDLE_IOCTL(SONET_GETFRSENSE, do_atm_ioctl)
+#if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
+HANDLE_IOCTL(VG_STATUS, do_lvm_ioctl)
+HANDLE_IOCTL(VG_CREATE, do_lvm_ioctl)
+HANDLE_IOCTL(VG_EXTEND, do_lvm_ioctl)
+HANDLE_IOCTL(LV_CREATE, do_lvm_ioctl)
+HANDLE_IOCTL(LV_REMOVE, do_lvm_ioctl)
+HANDLE_IOCTL(LV_EXTEND, do_lvm_ioctl)
+HANDLE_IOCTL(LV_REDUCE, do_lvm_ioctl)
+HANDLE_IOCTL(LV_RENAME, do_lvm_ioctl)
+HANDLE_IOCTL(LV_STATUS_BYNAME, do_lvm_ioctl)
+HANDLE_IOCTL(LV_STATUS_BYINDEX, do_lvm_ioctl)
+HANDLE_IOCTL(PV_CHANGE, do_lvm_ioctl)
+HANDLE_IOCTL(PV_STATUS, do_lvm_ioctl)
+#endif /* LVM */
+#if defined(CONFIG_DRM) || defined(CONFIG_DRM_MODULE)
+HANDLE_IOCTL(DRM32_IOCTL_VERSION, drm32_version);
+HANDLE_IOCTL(DRM32_IOCTL_GET_UNIQUE, drm32_getsetunique);
+HANDLE_IOCTL(DRM32_IOCTL_SET_UNIQUE, drm32_getsetunique);
+HANDLE_IOCTL(DRM32_IOCTL_ADD_MAP, drm32_addmap);
+HANDLE_IOCTL(DRM32_IOCTL_INFO_BUFS, drm32_info_bufs);
+HANDLE_IOCTL(DRM32_IOCTL_FREE_BUFS, drm32_free_bufs);
+HANDLE_IOCTL(DRM32_IOCTL_MAP_BUFS, drm32_map_bufs);
+HANDLE_IOCTL(DRM32_IOCTL_DMA, drm32_dma);
+HANDLE_IOCTL(DRM32_IOCTL_RES_CTX, drm32_res_ctx);
+#endif /* DRM */
+IOCTL_TABLE_END
+
+#define IOCTL_HASHSIZE 256
+struct ioctl_trans *ioctl32_hash_table[IOCTL_HASHSIZE];
+
+static inline unsigned long ioctl32_hash(unsigned long cmd)
+{
+       return (((cmd >> 6) ^ (cmd >> 4) ^ cmd)) % IOCTL_HASHSIZE;
+}
+
+static void ioctl32_insert_translation(struct ioctl_trans *trans)
+{
+       unsigned long hash;
+       struct ioctl_trans *t;
+
+       hash = ioctl32_hash (trans->cmd);
+       if (!ioctl32_hash_table[hash])
+               ioctl32_hash_table[hash] = trans;
+       else {
+               t = ioctl32_hash_table[hash];
+               while (t->next)
+                       t = t->next;
+               trans->next = 0;
+               t->next = trans;
+       }
+}
+
+static int __init init_sys32_ioctl(void)
+{
+       int i;
+       extern struct ioctl_trans ioctl_start[], ioctl_end[]; 
+
+       for (i = 0; &ioctl_start[i] < &ioctl_end[0]; i++) {
+               if (ioctl_start[i].next != 0) { 
+                       printk("ioctl translation %d bad\n",i); 
+                       return -1;
+               }
+
+               ioctl32_insert_translation(&ioctl_start[i]);
+       }
+       return 0;
+}
+
+__initcall(init_sys32_ioctl);
+
+static struct ioctl_trans *additional_ioctls;
+
+/* Always call these with kernel lock held! */
+
+int register_ioctl32_conversion(unsigned int cmd, int (*handler)(unsigned int, unsigned int, unsigned long, struct file *))
+{
+       int i;
+       if (!additional_ioctls) {
+               additional_ioctls = module_map(PAGE_SIZE);
+               if (!additional_ioctls)
+                       return -ENOMEM;
+               memset(additional_ioctls, 0, PAGE_SIZE);
+       }
+       for (i = 0; i < PAGE_SIZE/sizeof(struct ioctl_trans); i++)
+               if (!additional_ioctls[i].cmd)
+                       break;
+       if (i == PAGE_SIZE/sizeof(struct ioctl_trans))
+               return -ENOMEM;
+       additional_ioctls[i].cmd = cmd;
+       if (!handler)
+               additional_ioctls[i].handler = (u32)(long)sys_ioctl;
+       else
+               additional_ioctls[i].handler = (u32)(long)handler;
+       ioctl32_insert_translation(&additional_ioctls[i]);
+       return 0;
+}
+
+int unregister_ioctl32_conversion(unsigned int cmd)
+{
+       unsigned long hash = ioctl32_hash(cmd);
+       struct ioctl_trans *t, *t1;
+
+       t = (struct ioctl_trans *)(long)ioctl32_hash_table[hash];
+       if (!t) return -EINVAL;
+       if (t->cmd == cmd && t >= additional_ioctls &&
+           (unsigned long)t < ((unsigned long)additional_ioctls) + PAGE_SIZE) {
+               ioctl32_hash_table[hash] = t->next;
+               t->cmd = 0;
+               return 0;
+       } else while (t->next) {
+               t1 = (struct ioctl_trans *)(long)t->next;
+               if (t1->cmd == cmd && t1 >= additional_ioctls &&
+                   (unsigned long)t1 < ((unsigned long)additional_ioctls) + PAGE_SIZE) {
+                       t1->cmd = 0;
+                       t->next = t1->next;
+                       return 0;
+               }
+               t = t1;
+       }
+       return -EINVAL;
+}
+
+asmlinkage int sys32_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       struct file * filp;
+       int error = -EBADF;
+       int (*handler)(unsigned int, unsigned int, unsigned long, struct file * filp);
+       struct ioctl_trans *t;
+
+       filp = fget(fd);
+       if(!filp)
+               goto out2;
+
+       if (!filp->f_op || !filp->f_op->ioctl) {
+               error = sys_ioctl (fd, cmd, arg);
+               goto out;
+       }
+
+       t = (struct ioctl_trans *)(long)ioctl32_hash_table [ioctl32_hash (cmd)];
+
+       while (t && t->cmd != cmd)
+               t = (struct ioctl_trans *)(long)t->next;
+       if (t) {
+               handler = (void *)(long)t->handler;
+               error = handler(fd, cmd, arg, filp);
+       } else {
+               static int count = 0;
+               if (++count <= 50)
+                       printk("sys32_ioctl(%s:%d): Unknown cmd fd(%d) "
+                              "cmd(%08x) arg(%08x)\n",
+                              current->comm, current->pid,
+                              (int)fd, (unsigned int)cmd, (unsigned int)arg);
+               error = -EINVAL;
+       }
+out:
+       fput(filp);
+out2:
+       return error;
+}
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
new file mode 100644 (file)
index 0000000..3e4fd9a
--- /dev/null
@@ -0,0 +1,489 @@
+/*
+ *  linux/arch/x86_64/ia32/ia32_signal.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+ *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
+ *  2000-12-*   x86-64 compatibility mode signal handling by Andi Kleen
+ * 
+ *  $Id: ia32_signal.c,v 1.15 2001/10/16 23:41:42 ak Exp $
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/personality.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <asm/i387.h>
+#include <asm/ia32.h>
+#include <asm/ptrace.h>
+#include <asm/ia32_unistd.h>
+#include <asm/user32.h>
+
+#define ptr_to_u32(x) ((u32)(u64)(x))  /* avoid gcc warning */ 
+
+#define DEBUG_SIG 0
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
+
+static int ia32_copy_siginfo_to_user(siginfo_t32 *to, siginfo_t *from)
+{
+       if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t)))
+               return -EFAULT;
+       if (from->si_code < 0)
+               return __copy_to_user(to, from, sizeof(siginfo_t));
+       else {
+               int err;
+
+               /* If you change siginfo_t structure, please be sure
+                  this code is fixed accordingly.
+                  It should never copy any pad contained in the structure
+                  to avoid security leaks, but must copy the generic
+                  3 ints plus the relevant union member.  */
+               err = __put_user(from->si_signo, &to->si_signo);
+               err |= __put_user(from->si_errno, &to->si_errno);
+               err |= __put_user((short)from->si_code, &to->si_code);
+               /* First 32bits of unions are always present.  */
+               err |= __put_user(from->si_pid, &to->si_pid);
+               switch (from->si_code >> 16) {
+               case __SI_FAULT >> 16:
+                       break;
+               case __SI_CHLD >> 16:
+                       err |= __put_user(from->si_utime, &to->si_utime);
+                       err |= __put_user(from->si_stime, &to->si_stime);
+                       err |= __put_user(from->si_status, &to->si_status);
+               default:
+                       err |= __put_user(from->si_uid, &to->si_uid);
+                       break;
+               /* case __SI_RT: This is not generated by the kernel as of now.  */
+               }
+               return err;
+       }
+}
+
+asmlinkage int
+sys32_sigsuspend(int history0, int history1, old_sigset_t mask, struct pt_regs regs)
+{
+       sigset_t saveset;
+
+       mask &= _BLOCKABLE;
+       spin_lock_irq(&current->sigmask_lock);
+       saveset = current->blocked;
+       siginitset(&current->blocked, mask);
+       recalc_sigpending(current);
+       spin_unlock_irq(&current->sigmask_lock);
+
+       regs.rax = -EINTR;
+       while (1) {
+               current->state = TASK_INTERRUPTIBLE;
+               schedule();
+               if (do_signal(&regs, &saveset))
+                       return -EINTR;
+       }
+}
+
+asmlinkage int
+sys32_sigaltstack(const stack_ia32_t *uss_ptr, stack_ia32_t *uoss_ptr, 
+                                 struct pt_regs regs)
+{
+       stack_t uss,uoss; 
+       int ret;
+       mm_segment_t seg; 
+       if (!access_ok(VERIFY_READ,uss_ptr,sizeof(stack_ia32_t)) ||
+           __get_user(ptr_to_u32(uss.ss_sp), &uss_ptr->ss_sp) ||
+           __get_user((u32)uss.ss_flags, &uss_ptr->ss_flags) ||
+           __get_user((u32)uss.ss_size, &uss_ptr->ss_size))
+               return -EFAULT;
+       seg = get_fs(); 
+       set_fs(KERNEL_DS); 
+       ret = do_sigaltstack(&uss, &uoss, regs.rsp);
+       set_fs(seg); 
+       if (ret >= 0 && uoss_ptr)  {
+               if (!access_ok(VERIFY_WRITE,uss_ptr,sizeof(stack_ia32_t)) ||
+                   __put_user(ptr_to_u32(uss.ss_sp), &uss_ptr->ss_sp) ||
+                   __put_user((u32)uss.ss_flags, &uss_ptr->ss_flags) ||
+                   __put_user((u32)uss.ss_size, &uss_ptr->ss_size))
+                       ret = -EFAULT;
+       }       
+       return ret;     
+}
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+
+struct sigframe
+{
+       u32 pretcode;
+       int sig;
+       struct sigcontext_ia32 sc;
+       struct _fpstate_ia32 fpstate;
+       unsigned int extramask[_IA32_NSIG_WORDS-1];
+       char retcode[8];
+};
+
+struct rt_sigframe
+{
+       u32 pretcode;
+       int sig;
+       u32 pinfo;
+       u32 puc;
+       struct siginfo32 info;
+       struct ucontext_ia32 uc;
+       struct _fpstate_ia32 fpstate;
+       char retcode[8];
+};
+
+static int
+restore_sigcontext(struct pt_regs *regs, struct sigcontext_ia32 *sc, unsigned int *peax)
+{
+       unsigned int err = 0;
+       
+#if DEBUG_SIG
+       printk("SIG restore_sigcontext: sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n",
+               sc, sc->err, sc->eip, sc->cs, sc->eflags);
+#endif
+#define COPY(x)                { \
+       unsigned int reg;       \
+       err |= __get_user(reg, &sc->e ##x);     \
+       regs->r ## x = reg;                     \
+}
+
+#define RELOAD_SEG(seg)                                                        \
+       { unsigned int cur;                             \
+         unsigned short pre;                           \
+         err |= __get_user(pre, &sc->seg);                             \
+         asm volatile("movl %%" #seg ",%0" : "=r" (cur));              \
+         if (pre != cur) loadsegment(seg,pre); }
+
+       /* Reload fs and gs if they have changed in the signal handler.
+          This does not handle long fs/gs base changes in the handler, but does not clobber 
+          them at least in the normal case. */ 
+       RELOAD_SEG(gs);
+       RELOAD_SEG(fs);
+
+       COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
+       COPY(dx); COPY(cx); COPY(ip);
+       /* Don't touch extended registers */ 
+       
+       {
+               unsigned int tmpflags;
+               err |= __get_user(tmpflags, &sc->eflags);
+               regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
+               regs->orig_rax = -1;            /* disable syscall checks */
+       }
+
+       {
+               u32 tmp;
+               struct _fpstate * buf;
+               err |= __get_user(tmp, &sc->fpstate);
+               buf = (struct _fpstate *) (u64)tmp;
+               if (buf) {
+                       if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+                               goto badframe;
+                       err |= restore_i387(buf);
+               }
+       }
+
+       { 
+               u32 tmp;
+               err |= __get_user(tmp, &sc->eax);
+               *peax = tmp;
+       }
+       return err;
+
+badframe:
+       return 1;
+}
+
+asmlinkage int sys32_sigreturn(struct pt_regs regs)
+{
+       struct sigframe *frame = (struct sigframe *)(regs.rsp - 8);
+       sigset_t set;
+       unsigned int eax;
+
+       if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+               goto badframe;
+       if (__get_user(set.sig[0], &frame->sc.oldmask)
+           || (_IA32_NSIG_WORDS > 1
+               && __copy_from_user((((char *) &set.sig) + 4), &frame->extramask,
+                                   sizeof(frame->extramask))))
+               goto badframe;
+
+       sigdelsetmask(&set, ~_BLOCKABLE);
+       spin_lock_irq(&current->sigmask_lock);
+       current->blocked = set;
+       recalc_sigpending(current);
+       spin_unlock_irq(&current->sigmask_lock);
+       
+       if (restore_sigcontext(&regs, &frame->sc, &eax))
+               goto badframe;
+       return eax;
+
+badframe:
+       force_sig(SIGSEGV, current);
+       return 0;
+}      
+
+asmlinkage int sys32_rt_sigreturn(struct pt_regs regs)
+{
+       struct rt_sigframe *frame = (struct rt_sigframe *)(regs.rsp - 4);
+       sigset_t set;
+       stack_t st;
+       unsigned int eax;
+
+       if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+               goto badframe;
+       if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+               goto badframe;
+
+       sigdelsetmask(&set, ~_BLOCKABLE);
+       spin_lock_irq(&current->sigmask_lock);
+       current->blocked = set;
+       recalc_sigpending(current);
+       spin_unlock_irq(&current->sigmask_lock);
+       
+       if (restore_sigcontext(&regs, &frame->uc.uc_mcontext, &eax))
+               goto badframe;
+
+       if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st)))
+               goto badframe;
+       /* It is more difficult to avoid calling this function than to
+          call it and ignore errors.  */
+       {
+               mm_segment_t oldds = get_fs(); 
+               set_fs(KERNEL_DS); 
+               do_sigaltstack(&st, NULL, regs.rsp);
+               set_fs(oldds);  
+       }
+
+       return eax;
+
+badframe:
+       force_sig(SIGSEGV, current);
+       return 0;
+}      
+
+/*
+ * Set up a signal frame.
+ */
+
+static int
+setup_sigcontext(struct sigcontext_ia32 *sc, struct _fpstate_ia32 *fpstate,
+                struct pt_regs *regs, unsigned int mask)
+{
+       int tmp, err = 0;
+
+       tmp = 0;
+       __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
+       err |= __put_user(tmp, (unsigned int *)&sc->gs);
+       __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
+       err |= __put_user(tmp, (unsigned int *)&sc->fs);
+
+       err |= __put_user((u32)regs->rdi, &sc->edi);
+       err |= __put_user((u32)regs->rsi, &sc->esi);
+       err |= __put_user((u32)regs->rbp, &sc->ebp);
+       err |= __put_user((u32)regs->rsp, &sc->esp);
+       err |= __put_user((u32)regs->rbx, &sc->ebx);
+       err |= __put_user((u32)regs->rdx, &sc->edx);
+       err |= __put_user((u32)regs->rcx, &sc->ecx);
+       err |= __put_user((u32)regs->rax, &sc->eax);
+       err |= __put_user(current->thread.trap_no, &sc->trapno);
+       err |= __put_user(current->thread.error_code, &sc->err);
+       err |= __put_user((u32)regs->rip, &sc->eip);
+       err |= __put_user((u32)regs->eflags, &sc->eflags);
+       err |= __put_user((u32)regs->rsp, &sc->esp_at_signal);
+
+       tmp = save_i387(fpstate);
+       if (tmp < 0)
+         err = -EFAULT;
+       else
+         err |= __put_user((u32)(u64)(tmp ? fpstate : NULL), &sc->fpstate);
+
+       /* non-iBCS2 extensions.. */
+       err |= __put_user(mask, &sc->oldmask);
+       err |= __put_user(current->thread.cr2, &sc->cr2);
+
+       return err;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+       unsigned long rsp;
+
+       /* Default to using normal stack */
+       rsp = regs->rsp;
+
+       /* This is the X/Open sanctioned signal stack switching.  */
+       if (ka->sa.sa_flags & SA_ONSTACK) {
+               if (! on_sig_stack(rsp))
+                       rsp = current->sas_ss_sp + current->sas_ss_size;
+       }
+
+       /* This is the legacy signal stack switching. */
+       else if ((regs->ss & 0xffff) != __USER_DS &&
+               !(ka->sa.sa_flags & SA_RESTORER) &&
+                ka->sa.sa_restorer) {
+               rsp = (unsigned long) ka->sa.sa_restorer;
+       }
+
+       return (void *)((rsp - frame_size) & -8UL);
+}
+
+void ia32_setup_frame(int sig, struct k_sigaction *ka,
+                       sigset32_t *set, struct pt_regs * regs)
+{
+       struct sigframe *frame;
+       int err = 0;
+       struct exec_domain *exec_domain = current_thread_info()->exec_domain; 
+
+       frame = get_sigframe(ka, regs, sizeof(*frame));
+
+       if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+               goto give_sigsegv;
+
+       err |= __put_user((exec_domain
+                          && exec_domain->signal_invmap
+                          && sig < 32
+                          ? exec_domain->signal_invmap[sig]
+                          : sig),
+                         &frame->sig);
+       if (err)
+               goto give_sigsegv;
+
+       err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]);
+       if (err)
+               goto give_sigsegv;
+
+       if (_IA32_NSIG_WORDS > 1) {
+               err |= __copy_to_user(frame->extramask, &set->sig[1],
+                                     sizeof(frame->extramask));
+       }
+       if (err)
+               goto give_sigsegv;
+
+       /* Set up to return from userspace.  If provided, use a stub
+          already in userspace.  */
+       if (ka->sa.sa_flags & SA_RESTORER) {
+               err |= __put_user((u32)(u64)ka->sa.sa_restorer, &frame->pretcode);
+       } else {
+               err |= __put_user((u32)(u64)frame->retcode, &frame->pretcode);
+               /* This is popl %eax ; movl $,%eax ; int $0x80 */
+               err |= __put_user((u16)0xb858, (short *)(frame->retcode+0));
+               err |= __put_user((u32)__NR_ia32_sigreturn, (int *)(frame->retcode+2));
+               err |= __put_user((u16)0x80cd, (short *)(frame->retcode+6));
+       }
+
+       if (err)
+               goto give_sigsegv;
+
+       /* Set up registers for signal handler */
+       regs->rsp = (unsigned long) frame;
+       regs->rip = (unsigned long) ka->sa.sa_handler;
+
+       set_fs(USER_DS);
+       // XXX: cs
+       regs->eflags &= ~TF_MASK;
+
+#if DEBUG_SIG
+       printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
+               current->comm, current->pid, frame, regs->rip, frame->pretcode);
+#endif
+
+       return;
+
+give_sigsegv:
+       if (sig == SIGSEGV)
+               ka->sa.sa_handler = SIG_DFL;
+       force_sig(SIGSEGV, current);
+}
+
+void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+                          sigset32_t *set, struct pt_regs * regs)
+{
+       struct rt_sigframe *frame;
+       int err = 0;
+       struct exec_domain *exec_domain = current_thread_info()->exec_domain; 
+
+       frame = get_sigframe(ka, regs, sizeof(*frame));
+
+       if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+               goto give_sigsegv;
+
+       err |= __put_user((exec_domain
+                          && exec_domain->signal_invmap
+                          && sig < 32
+                          ? exec_domain->signal_invmap[sig]
+                          : sig),
+                         &frame->sig);
+       err |= __put_user((u32)(u64)&frame->info, &frame->pinfo);
+       err |= __put_user((u32)(u64)&frame->uc, &frame->puc);
+       err |= ia32_copy_siginfo_to_user(&frame->info, info);
+       if (err)
+               goto give_sigsegv;
+
+       /* Create the ucontext.  */
+       err |= __put_user(0, &frame->uc.uc_flags);
+       err |= __put_user(0, &frame->uc.uc_link);
+       err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+       err |= __put_user(sas_ss_flags(regs->rsp),
+                         &frame->uc.uc_stack.ss_flags);
+       err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+       err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
+                               regs, set->sig[0]);
+       err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+       if (err)
+               goto give_sigsegv;
+
+       /* Set up to return from userspace.  If provided, use a stub
+          already in userspace.  */
+       if (ka->sa.sa_flags & SA_RESTORER) {
+               err |= __put_user((u32)(u64)ka->sa.sa_restorer, &frame->pretcode);
+       } else {
+               err |= __put_user(ptr_to_u32(frame->retcode), &frame->pretcode);
+               /* This is movl $,%eax ; int $0x80 */
+               err |= __put_user(0xb8, (char *)(frame->retcode+0));
+               err |= __put_user((u32)__NR_ia32_rt_sigreturn, (int *)(frame->retcode+1));
+               err |= __put_user(0x80cd, (short *)(frame->retcode+5));
+       }
+
+       if (err)
+               goto give_sigsegv;
+
+       /* Set up registers for signal handler */
+       regs->rsp = (unsigned long) frame;
+       regs->rip = (unsigned long) ka->sa.sa_handler;
+
+       set_fs(USER_DS);
+       // XXX: cs
+       regs->eflags &= ~TF_MASK;
+
+#if DEBUG_SIG
+       printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
+               current->comm, current->pid, frame, regs->rip, frame->pretcode);
+#endif
+
+       return;
+
+give_sigsegv:
+       if (sig == SIGSEGV)
+               ka->sa.sa_handler = SIG_DFL;
+       force_sig(SIGSEGV, current);
+}
+
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
new file mode 100644 (file)
index 0000000..cf45f8e
--- /dev/null
@@ -0,0 +1,365 @@
+/*
+ * Compatibility mode system call entry point for x86-64. 
+ *             
+ * Copyright 2000,2001,2002 Andi Kleen, SuSE Labs.
+ * 
+ * $Id: ia32entry.S,v 1.24 2001/11/11 17:47:47 ak Exp $                
+ */             
+
+#include <asm/calling.h>
+#include <asm/offset.h>
+#include <asm/thread_info.h>
+#include <linux/linkage.h>
+#include <asm/errno.h>
+#include <asm/ia32_unistd.h>   
+
+       .macro IA32_ARG_FIXUP
+       movl    %edi,%r8d
+       movl    %ebp,%r9d
+       xchg    %ecx,%esi
+       movl    %ebx,%edi
+       movl    %edx,%edx       /* zero extension */
+       .endm 
+
+/*
+ * 32bit SYSCALL instruction entry.    
+ * It'll probably kill you because it destroys your segments.
+ * Should coredump here, but the next instruction will likely do 
+ * that anyways.
+ */    
+ENTRY(ia32_cstar_target)
+       movq $-ENOSYS,%rax
+       SYSRET32
+       
+/* 
+ * Emulated IA32 system calls via int 0x80. 
+ *
+ * Arguments:   
+ * %eax        System call number.
+ * %ebx Arg1
+ * %ecx Arg2
+ * %edx Arg3
+ * %esi Arg4
+ * %edi Arg5
+ * %ebp Arg6    [note: not saved in the stack frame, should not be touched]
+ *
+ * Notes:
+ * Uses the same stack frame as the x86-64 version.    
+ * All registers except %eax must be saved (but ptrace may violate that)
+ * Arguments are zero extended. For system calls that want sign extension and
+ * take long arguments a wrapper is needed. Most calls can just be called
+ * directly.
+ * Assumes it is only called from user space and entered with interrups off.   
+ */                            
+
+ENTRY(ia32_syscall)
+       swapgs  
+       sti
+       pushq %rax
+       cld
+       SAVE_ARGS
+       GET_THREAD_INFO(%r10)
+       bt $TIF_SYSCALL_TRACE,threadinfo_flags(%r10)
+       jc ia32_tracesys
+ia32_do_syscall:       
+       cmpl $(IA32_NR_syscalls),%eax
+       jae  ia32_badsys
+       IA32_ARG_FIXUP
+       movl $1,%r10d
+       call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
+       movq %rax,RAX-ARGOFFSET(%rsp)
+       jmp int_ret_from_sys_call 
+
+ia32_tracesys:                  
+       SAVE_REST
+       movq $-ENOSYS,RAX(%rsp) /* really needed? */
+       movq %rsp,%rdi        /* &pt_regs -> arg1 */
+       call syscall_trace
+       LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
+       RESTORE_REST
+       jmp ia32_do_syscall
+
+ia32_badsys:
+       movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
+       jmp int_ret_from_sys_call
+
+ni_syscall:
+       movq %rax,%rdi
+       jmp  sys32_ni_syscall                   
+
+       .macro PTREGSCALL label, func
+       .globl \label
+\label:
+       leaq \func(%rip),%rax
+       jmp  ia32_ptregs_common 
+       .endm
+
+       PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
+       PTREGSCALL stub32_sigreturn, sys32_sigreturn
+       PTREGSCALL stub32_sigaltstack, sys32_sigaltstack
+       PTREGSCALL stub32_sigsuspend, sys32_sigsuspend
+       PTREGSCALL stub32_execve, sys32_execve
+       PTREGSCALL stub32_fork, sys32_fork
+       PTREGSCALL stub32_clone, sys32_clone
+       PTREGSCALL stub32_vfork, sys32_vfork
+       PTREGSCALL stub32_iopl, sys_iopl
+       PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend
+
+ENTRY(ia32_ptregs_common)
+       popq %r11       /* save return address outside the stack frame. */
+       SAVE_REST
+       movq %r11, %r15
+       call *%rax
+       movq %r15, %r11
+       RESTORE_REST
+       pushq %r11
+       ret
+
+       .data
+       .align 8
+ia32_sys_call_table:
+       .quad ni_syscall        /* 0  -  old "setup" system call*/
+       .quad sys_exit
+       .quad stub32_fork
+       .quad sys_read
+       .quad sys_write
+       .quad sys_open          /* 5 */
+       .quad sys_close
+       .quad sys32_waitpid
+       .quad sys_creat
+       .quad sys_link
+       .quad sys_unlink                /* 10 */
+       .quad stub32_execve
+       .quad sys_chdir
+       .quad sys32_time
+       .quad sys_mknod
+       .quad sys_chmod         /* 15 */
+       .quad sys_lchown16
+       .quad ni_syscall                        /* old break syscall holder */
+       .quad ni_syscall        /* (old)stat */ 
+       .quad sys32_lseek
+       .quad sys_getpid                /* 20 */
+       .quad sys_mount /* mount  */
+       .quad sys_oldumount     /* old_umount  */
+       .quad sys_setuid16
+       .quad sys_getuid16
+       .quad ni_syscall        /* stime */             /* 25 */
+       .quad sys32_ptrace      /* ptrace */
+       .quad sys_alarm         /* XXX sign extension??? */ 
+       .quad ni_syscall        /* (old)fstat */
+       .quad sys_pause
+       .quad sys32_utime       /* 30 */
+       .quad ni_syscall        /* old stty syscall holder */
+       .quad ni_syscall        /* old gtty syscall holder */
+       .quad sys_access
+       .quad sys_nice  
+       .quad ni_syscall        /* 35 */        /* old ftime syscall holder */
+       .quad sys_sync
+       .quad sys32_kill
+       .quad sys_rename
+       .quad sys_mkdir
+       .quad sys_rmdir         /* 40 */
+       .quad sys_dup
+       .quad sys32_pipe
+       .quad sys32_times
+       .quad ni_syscall                        /* old prof syscall holder */
+       .quad sys_brk           /* 45 */
+       .quad sys_setgid16
+       .quad sys_getgid16
+       .quad ni_syscall        /* signal */
+       .quad sys_geteuid16
+       .quad sys_getegid16     /* 50 */
+       .quad sys_acct
+       .quad sys_umount                        /* new_umount */
+       .quad ni_syscall                        /* old lock syscall holder */
+       .quad sys32_ioctl
+       .quad sys32_fcntl               /* 55 */
+       .quad ni_syscall                        /* old mpx syscall holder */
+       .quad sys_setpgid
+       .quad ni_syscall                        /* old ulimit syscall holder */
+       .quad sys32_olduname
+       .quad sys_umask         /* 60 */
+       .quad sys_chroot
+       .quad sys32_ustat
+       .quad sys_dup2
+       .quad sys_getppid
+       .quad sys_getpgrp               /* 65 */
+       .quad sys_setsid
+       .quad sys32_sigaction
+       .quad sys_sgetmask
+       .quad sys_ssetmask
+       .quad sys_setreuid16    /* 70 */
+       .quad sys_setregid16
+       .quad stub32_sigsuspend
+       .quad sys32_sigpending
+       .quad sys_sethostname
+       .quad sys32_setrlimit   /* 75 */
+       .quad sys32_old_getrlimit       /* old_getrlimit */
+       .quad sys32_getrusage
+       .quad sys32_gettimeofday
+       .quad sys32_settimeofday
+       .quad sys_getgroups16   /* 80 */
+       .quad sys_setgroups16
+       .quad sys32_old_select
+       .quad sys_symlink
+       .quad ni_syscall        /* (old)lstat */
+       .quad sys_readlink              /* 85 */
+       .quad sys_uselib
+       .quad sys_swapon
+       .quad sys_reboot
+       .quad sys32_oldreaddir
+       .quad sys32_mmap                /* 90 */
+       .quad sys_munmap
+       .quad sys_truncate
+       .quad sys_ftruncate
+       .quad sys_fchmod
+       .quad sys_fchown16              /* 95 */
+       .quad sys_getpriority
+       .quad sys_setpriority
+       .quad ni_syscall                        /* old profil syscall holder */
+       .quad sys32_statfs
+       .quad sys32_fstatfs             /* 100 */
+       .quad sys_ioperm
+       .quad sys32_socketcall
+       .quad sys_syslog
+       .quad sys32_setitimer
+       .quad sys32_getitimer   /* 105 */
+       .quad sys32_newstat
+       .quad sys32_newlstat
+       .quad sys32_newfstat
+       .quad sys32_uname
+       .quad stub32_iopl               /* 110 */
+       .quad sys_vhangup
+       .quad ni_syscall        /* old "idle" system call */
+       .quad ni_syscall        /* vm86old */ 
+       .quad sys32_wait4
+       .quad sys_swapoff               /* 115 */
+       .quad sys32_sysinfo
+       .quad sys32_ipc
+       .quad sys_fsync
+       .quad stub32_sigreturn
+       .quad stub32_clone              /* 120 */
+       .quad sys_setdomainname
+       .quad sys_newuname
+       .quad ni_syscall        /* modify_ldt */
+       .quad sys32_adjtimex
+       .quad sys_mprotect              /* 125 */
+       .quad sys32_sigprocmask
+       .quad ni_syscall        /* query_module */
+       .quad ni_syscall        /* init_module */
+       .quad ni_syscall        /* delete module */
+       .quad ni_syscall        /* 130  get_kernel_syms */
+       .quad ni_syscall        /* quotactl */ 
+       .quad sys_getpgid
+       .quad sys_fchdir
+       .quad ni_syscall        /* bdflush */
+       .quad sys_sysfs         /* 135 */
+       .quad sys_personality
+       .quad ni_syscall        /* for afs_syscall */
+       .quad sys_setfsuid16
+       .quad sys_setfsgid16
+       .quad sys_llseek                /* 140 */
+       .quad sys32_getdents
+       .quad sys32_select
+       .quad sys_flock
+       .quad sys_msync
+       .quad sys32_readv               /* 145 */
+       .quad sys32_writev
+       .quad sys_getsid
+       .quad sys_fdatasync
+       .quad sys32_sysctl      /* sysctl */
+       .quad sys_mlock         /* 150 */
+       .quad sys_munlock
+       .quad sys_mlockall
+       .quad sys_munlockall
+       .quad sys_sched_setparam
+       .quad sys_sched_getparam   /* 155 */
+       .quad sys_sched_setscheduler
+       .quad sys_sched_getscheduler
+       .quad sys_sched_yield
+       .quad sys_sched_get_priority_max
+       .quad sys_sched_get_priority_min  /* 160 */
+       .quad sys_sched_rr_get_interval
+       .quad sys32_nanosleep
+       .quad sys_mremap
+       .quad sys_setresuid16
+       .quad sys_getresuid16   /* 165 */
+       .quad ni_syscall        /* vm86 */ 
+       .quad ni_syscall        /* query_module */
+       .quad sys_poll
+       .quad ni_syscall        /* nfsserverctl */ 
+       .quad sys_setresgid16   /* 170 */
+       .quad sys_getresgid16
+       .quad sys_prctl
+       .quad stub32_rt_sigreturn
+       .quad sys32_rt_sigaction
+       .quad sys32_rt_sigprocmask      /* 175 */
+       .quad sys32_rt_sigpending
+       .quad sys32_rt_sigtimedwait
+       .quad sys32_rt_sigqueueinfo
+       .quad stub32_rt_sigsuspend
+       .quad sys32_pread               /* 180 */
+       .quad sys32_pwrite
+       .quad sys_chown16
+       .quad sys_getcwd
+       .quad ni_syscall        /* capget */
+       .quad ni_syscall        /* capset */
+       .quad stub32_sigaltstack
+       .quad sys32_sendfile
+       .quad ni_syscall                /* streams1 */
+       .quad ni_syscall                /* streams2 */
+       .quad stub32_vfork            /* 190 */
+       .quad sys32_getrlimit
+       .quad sys32_mmap2
+       .quad sys_truncate
+       .quad sys_ftruncate
+       .quad sys32_stat64              /* 195 */
+       .quad sys32_lstat64
+       .quad sys32_fstat64
+       .quad sys_lchown
+       .quad sys_getuid
+       .quad sys_getgid                /* 200 */
+       .quad sys_geteuid
+       .quad sys_getegid
+       .quad sys32_setreuid
+       .quad sys32_setregid
+       .quad sys32_getgroups   /* 205 */
+       .quad sys32_setgroups
+       .quad sys_fchown
+       .quad sys32_setresuid
+       .quad sys32_getresuid
+       .quad sys32_setresgid   /* 210 */
+       .quad sys32_getresgid
+       .quad sys_chown
+       .quad sys_setuid
+       .quad sys_setgid
+       .quad sys_setfsuid              /* 215 */
+       .quad sys_setfsgid
+       .quad sys_pivot_root
+       .quad sys_mincore
+       .quad sys_madvise
+       .quad sys_getdents64    /* 220 */ 
+       .quad sys32_fcntl64     
+       .quad sys_ni_syscall    /* tux */
+       .quad sys_ni_syscall    /* security */
+       .quad sys_gettid        
+       .quad sys_readahead     /* 225 */
+       .quad sys_setxattr
+       .quad sys_lsetxattr
+       .quad sys_fsetxattr
+       .quad sys_getxattr
+       .quad sys_lgetxattr     /* 230 */
+       .quad sys_fgetxattr
+       .quad sys_listxattr
+       .quad sys_llistxattr
+       .quad sys_flistxattr
+       .quad sys_removexattr   /* 235 */
+       .quad sys_lremovexattr
+       .quad sys_fremovexattr
+       .quad sys_tkill         /* 238 */ 
+ia32_syscall_end:              
+       .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8
+               .quad ni_syscall
+       .endr
+
+       
diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c
new file mode 100644 (file)
index 0000000..0da46a4
--- /dev/null
@@ -0,0 +1,312 @@
+/* 
+ * 32bit ptrace for x86-64.
+ *
+ * Copyright 2001 Andi Kleen, SuSE Labs.
+ * Some parts copied from arch/i386/kernel/ptrace.c. See that file for 
+ * earlier copyright.
+ * 
+ * This allows to access 64bit processes too but there is no way to see 
+ * the extended register contents.
+ *
+ * $Id: ptrace32.c,v 1.2 2001/08/15 06:41:13 ak Exp $
+ */ 
+
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/mm.h>
+#include <linux/ptrace.h>
+#include <asm/ptrace.h>
+#include <asm/uaccess.h>
+#include <asm/user32.h>
+#include <asm/errno.h>
+#include <asm/debugreg.h>
+
+#define R32(l,q) \
+       case offsetof(struct user32, regs.l): stack[offsetof(struct pt_regs, q)/8] = val; break
+
+static int putreg32(struct task_struct *child, unsigned regno, u32 val)
+{
+       int i;
+       __u64 *stack = (__u64 *)(child->thread.rsp0 - sizeof(struct pt_regs)); 
+
+       switch (regno) {
+       case offsetof(struct user32, regs.fs):
+               child->thread.fs = val; 
+               break;
+       case offsetof(struct user32, regs.gs):
+               child->thread.gs = val;
+               break;
+       case offsetof(struct user32, regs.ds):
+               child->thread.ds = val;
+               break;
+       case offsetof(struct user32, regs.es):
+               child->thread.es = val;
+               break;
+
+       R32(cs, cs);
+       R32(ss, ss);
+       R32(ebx, rbx); 
+       R32(ecx, rcx);
+       R32(edx, rdx);
+       R32(edi, rdi);
+       R32(esi, rsi);
+       R32(ebp, rbp);
+       R32(eax, rax);
+       R32(orig_eax, orig_rax);
+       R32(eip, rip);
+       R32(esp, rsp);
+
+       case offsetof(struct user32, regs.eflags): 
+               stack[offsetof(struct pt_regs, eflags)/8] = val & 0x44dd5; 
+               break;
+
+       case offsetof(struct user32, u_debugreg[0]) ... offsetof(struct user32, u_debugreg[6]):
+               child->thread.debugreg[(regno-offsetof(struct user32, u_debugreg[0]))/4] = val; 
+               break; 
+
+       case offsetof(struct user32, u_debugreg[7]):
+               val &= ~DR_CONTROL_RESERVED;
+               /* You are not expected to understand this ... I don't neither. */
+               for(i=0; i<4; i++)
+                       if ((0x5454 >> ((val >> (16 + 4*i)) & 0xf)) & 1)
+                              return -EIO;
+               child->thread.debugreg[7] = val; 
+               break; 
+                   
+       default:
+               if (regno > sizeof(struct user32) || (regno & 3))
+                       return -EIO;
+              
+               /* Other dummy fields in the virtual user structure are ignored */ 
+               break;          
+       }
+       return 0;
+}
+
+#undef R32
+
+#define R32(l,q) \
+       case offsetof(struct user32, regs.l): *val = stack[offsetof(struct pt_regs, q)/8]; break
+
+static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
+{
+       __u64 *stack = (__u64 *)(child->thread.rsp0 - sizeof(struct pt_regs)); 
+
+       switch (regno) {
+       case offsetof(struct user32, regs.fs):
+               *val = child->thread.fs; 
+               break;
+       case offsetof(struct user32, regs.gs):
+               *val = child->thread.gs;
+               break;
+       case offsetof(struct user32, regs.ds):
+               *val = child->thread.ds;
+               break;
+       case offsetof(struct user32, regs.es):
+               *val = child->thread.es;
+               break;
+
+       R32(cs, cs);
+       R32(ss, ss);
+       R32(ebx, rbx); 
+       R32(ecx, rcx);
+       R32(edx, rdx);
+       R32(edi, rdi);
+       R32(esi, rsi);
+       R32(ebp, rbp);
+       R32(eax, rax);
+       R32(orig_eax, orig_rax);
+       R32(eip, rip);
+       R32(eflags, eflags);
+       R32(esp, rsp);
+
+       case offsetof(struct user32, u_debugreg[0]) ... offsetof(struct user32, u_debugreg[7]):
+               *val = child->thread.debugreg[(regno-offsetof(struct user32, u_debugreg[0]))/4]; 
+               break; 
+                   
+       default:
+               if (regno > sizeof(struct user32) || (regno & 3))
+                       return -EIO;
+
+               /* Other dummy fields in the virtual user structure are ignored */ 
+               *val = 0;
+               break;          
+       }
+       return 0;
+}
+
+#undef R32
+
+
+static struct task_struct *find_target(int request, int pid, int *err)
+{ 
+       struct task_struct *child;
+
+       *err = -EPERM; 
+       if (pid == 1)
+               return NULL; 
+
+       *err = -ESRCH;
+       read_lock(&tasklist_lock);
+       child = find_task_by_pid(pid);
+       if (child)
+               get_task_struct(child);
+       read_unlock(&tasklist_lock);
+       if (child) { 
+               *err = -ESRCH;
+               if (!(child->ptrace & PT_PTRACED))
+                       goto out;
+               if (child->state != TASK_STOPPED) {
+                       if (request != PTRACE_KILL)
+                               goto out;
+               }
+               if (child->p_pptr != current)
+                       goto out;
+
+               return child; 
+       } 
+ out:
+       put_task_struct(child);
+       return NULL; 
+       
+} 
+
+extern asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, unsigned long data);
+
+asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
+{
+       struct task_struct *child;
+       int ret;
+       __u32 val;
+
+       switch (request) { 
+       case PTRACE_TRACEME:
+       case PTRACE_ATTACH:
+       case PTRACE_SYSCALL:
+       case PTRACE_CONT:
+       case PTRACE_KILL:
+       case PTRACE_SINGLESTEP:
+       case PTRACE_DETACH:
+       case PTRACE_SETOPTIONS:
+               ret = sys_ptrace(request, pid, addr, data); 
+               return ret;
+
+       case PTRACE_PEEKTEXT:
+       case PTRACE_PEEKDATA:
+       case PTRACE_POKEDATA:
+       case PTRACE_POKETEXT:
+       case PTRACE_POKEUSR:       
+       case PTRACE_PEEKUSR:
+       case PTRACE_GETREGS:
+       case PTRACE_SETREGS:
+       case PTRACE_SETFPREGS:
+       case PTRACE_GETFPREGS:
+               break;
+               
+       default:
+               return -EIO;
+       } 
+
+       child = find_target(request, pid, &ret);
+       if (!child)
+               return ret;
+
+       switch (request) {
+       case PTRACE_PEEKDATA:
+       case PTRACE_PEEKTEXT:
+               ret = 0;
+               if (access_process_vm(child, addr, &val, sizeof(u32), 0) != sizeof(u32))
+                       ret = -EIO;
+               else
+                       ret = put_user(val, (unsigned int *)(u64)data); 
+               break; 
+
+       case PTRACE_POKEDATA:
+       case PTRACE_POKETEXT:
+               ret = 0;
+               if (access_process_vm(child, addr, &data, sizeof(u32), 1) != sizeof(u32))
+                       ret = -EIO; 
+               break;
+
+       case PTRACE_PEEKUSR:
+               ret = getreg32(child, addr, &val);
+               if (ret >= 0) 
+                       ret = put_user(val, (__u32 *)(unsigned long) data);
+               break;
+
+       case PTRACE_POKEUSR:
+               ret = putreg32(child, addr, data);
+               break;
+
+       case PTRACE_GETREGS: { /* Get all gp regs from the child. */
+               int i;
+               if (!access_ok(VERIFY_WRITE, (unsigned *)(unsigned long)data, FRAME_SIZE)) {
+                       ret = -EIO;
+                       break;
+               }
+               ret = 0;
+               for ( i = 0; i <= 16*4 ; i += sizeof(__u32) ) {
+                       getreg32(child, i, &val);
+                       ret |= __put_user(val,(u32 *) (unsigned long) data);
+                       data += sizeof(u32);
+               }
+               break;
+       }
+
+       case PTRACE_SETREGS: { /* Set all gp regs in the child. */
+               unsigned long tmp;
+               int i;
+               if (!access_ok(VERIFY_READ, (unsigned *)(unsigned long)data, FRAME_SIZE)) {
+                       ret = -EIO;
+                       break;
+               }
+               ret = 0; 
+               for ( i = 0; i <= 16*4; i += sizeof(u32) ) {
+                       ret |= __get_user(tmp, (u32 *) (unsigned long) data);
+                       putreg32(child, i, tmp);
+                       data += sizeof(u32);
+               }
+               break;
+       }
+
+#if 0 /* to be done. */
+       case PTRACE_GETFPREGS: { /* Get the child extended FPU state. */
+               if (!access_ok(VERIFY_WRITE, (unsigned *)data,
+                              sizeof(struct user_i387_struct))) {
+                       ret = -EIO;
+                       break;
+               }
+               if ( !child->used_math ) {
+                       /* Simulate an empty FPU. */
+                       set_fpu_cwd(child, 0x037f);
+                       set_fpu_swd(child, 0x0000);
+                       set_fpu_twd(child, 0xffff);
+                       set_fpu_mxcsr(child, 0x1f80);
+               }
+               ret = get_fpregs((struct user_i387_struct *)data, child);
+               break;
+       }
+
+       case PTRACE_SETFPREGS: { /* Set the child extended FPU state. */
+               if (!access_ok(VERIFY_READ, (unsigned *)data,
+                              sizeof(struct user_i387_struct))) {
+                       ret = -EIO;
+                       break;
+               }
+               child->used_math = 1;
+               ret = set_fpregs(child, (struct user_i387_struct *)data);
+               break;
+
+#endif
+
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       put_task_struct(child);
+       return ret;
+}
+
diff --git a/arch/x86_64/ia32/socket32.c b/arch/x86_64/ia32/socket32.c
new file mode 100644 (file)
index 0000000..bc5c07e
--- /dev/null
@@ -0,0 +1,686 @@
+/* 
+ * 32bit Socket syscall emulation. Based on arch/sparc64/kernel/sys_sparc32.c.
+ *
+ * Copyright (C) 2000          VA Linux Co
+ * Copyright (C) 2000          Don Dugger <n0ano@valinux.com>
+ * Copyright (C) 1999          Arun Sharma <arun.sharma@intel.com>
+ * Copyright (C) 1997,1998     Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997          David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 2000          Hewlett-Packard Co.
+ * Copyright (C) 2000          David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2000,2001     Andi Kleen, SuSE Labs 
+ */
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/file.h>
+#include <linux/icmpv6.h>
+#include <linux/socket.h>
+#include <linux/filter.h>
+
+#include <net/scm.h>
+#include <net/sock.h>
+#include <asm/ia32.h>
+#include <asm/uaccess.h>
+#include <asm/socket32.h>
+
+#define A(__x)         ((unsigned long)(__x))
+#define AA(__x)                ((unsigned long)(__x))
+
+
+static inline int iov_from_user32_to_kern(struct iovec *kiov,
+                                         struct iovec32 *uiov32,
+                                         int niov)
+{
+       int tot_len = 0;
+
+       while(niov > 0) {
+               u32 len, buf;
+
+               if(get_user(len, &uiov32->iov_len) ||
+                  get_user(buf, &uiov32->iov_base)) {
+                       tot_len = -EFAULT;
+                       break;
+               }
+               tot_len += len;
+               kiov->iov_base = (void *)A(buf);
+               kiov->iov_len = (__kernel_size_t) len;
+               uiov32++;
+               kiov++;
+               niov--;
+       }
+       return tot_len;
+}
+
+static inline int msghdr_from_user32_to_kern(struct msghdr *kmsg,
+                                            struct msghdr32 *umsg)
+{
+       u32 tmp1, tmp2, tmp3;
+       int err;
+
+       err = get_user(tmp1, &umsg->msg_name);
+       err |= __get_user(tmp2, &umsg->msg_iov);
+       err |= __get_user(tmp3, &umsg->msg_control);
+       if (err)
+               return -EFAULT;
+
+       kmsg->msg_name = (void *)A(tmp1);
+       kmsg->msg_iov = (struct iovec *)A(tmp2);
+       kmsg->msg_control = (void *)A(tmp3);
+
+       err = get_user(kmsg->msg_namelen, &umsg->msg_namelen);
+       err |= get_user(kmsg->msg_iovlen, &umsg->msg_iovlen);
+       err |= get_user(kmsg->msg_controllen, &umsg->msg_controllen);
+       err |= get_user(kmsg->msg_flags, &umsg->msg_flags);
+       
+       return err;
+}
+
+/* I've named the args so it is easy to tell whose space the pointers are in. */
+static int verify_iovec32(struct msghdr *kern_msg, struct iovec *kern_iov,
+                         char *kern_address, int mode)
+{
+       int tot_len;
+
+       if(kern_msg->msg_namelen) {
+               if(mode==VERIFY_READ) {
+                       int err = move_addr_to_kernel(kern_msg->msg_name,
+                                                     kern_msg->msg_namelen,
+                                                     kern_address);
+                       if(err < 0)
+                               return err;
+               }
+               kern_msg->msg_name = kern_address;
+       } else
+               kern_msg->msg_name = NULL;
+
+       if(kern_msg->msg_iovlen > UIO_FASTIOV) {
+               kern_iov = kmalloc(kern_msg->msg_iovlen * sizeof(struct iovec),
+                                  GFP_KERNEL);
+               if(!kern_iov)
+                       return -ENOMEM;
+       }
+
+       tot_len = iov_from_user32_to_kern(kern_iov,
+                                         (struct iovec32 *)kern_msg->msg_iov,
+                                         kern_msg->msg_iovlen);
+       if(tot_len >= 0)
+               kern_msg->msg_iov = kern_iov;
+       else if(kern_msg->msg_iovlen > UIO_FASTIOV)
+               kfree(kern_iov);
+
+       return tot_len;
+}
+
+/* There is a lot of hair here because the alignment rules (and
+ * thus placement) of cmsg headers and length are different for
+ * 32-bit apps.  -DaveM
+ */
+static int cmsghdr_from_user32_to_kern(struct msghdr *kmsg,
+                                      unsigned char *stackbuf, int stackbuf_size)
+{
+       struct cmsghdr32 *ucmsg;
+       struct cmsghdr *kcmsg, *kcmsg_base;
+       __kernel_size_t32 ucmlen;
+       __kernel_size_t kcmlen, tmp;
+
+       kcmlen = 0;
+       kcmsg_base = kcmsg = (struct cmsghdr *)stackbuf;
+       ucmsg = CMSG32_FIRSTHDR(kmsg);
+       while(ucmsg != NULL) {
+               if(get_user(ucmlen, &ucmsg->cmsg_len))
+                       return -EFAULT;
+
+               /* Catch bogons. */
+               if(CMSG32_ALIGN(ucmlen) <
+                  CMSG32_ALIGN(sizeof(struct cmsghdr32)))
+                       return -EINVAL;
+               if((unsigned long)(((char *)ucmsg - (char *)kmsg->msg_control)
+                                  + ucmlen) > kmsg->msg_controllen)
+                       return -EINVAL;
+
+               tmp = ((ucmlen - CMSG32_ALIGN(sizeof(*ucmsg))) +
+                      CMSG_ALIGN(sizeof(struct cmsghdr)));
+               kcmlen += tmp;
+               ucmsg = CMSG32_NXTHDR(kmsg, ucmsg, ucmlen);
+       }
+       if(kcmlen == 0)
+               return -EINVAL;
+
+       /* The kcmlen holds the 64-bit version of the control length.
+        * It may not be modified as we do not stick it into the kmsg
+        * until we have successfully copied over all of the data
+        * from the user.
+        */
+       if(kcmlen > stackbuf_size)
+               kcmsg_base = kcmsg = kmalloc(kcmlen, GFP_KERNEL);
+       if(kcmsg == NULL)
+               return -ENOBUFS;
+
+       /* Now copy them over neatly. */
+       memset(kcmsg, 0, kcmlen);
+       ucmsg = CMSG32_FIRSTHDR(kmsg);
+       while(ucmsg != NULL) {
+               __get_user(ucmlen, &ucmsg->cmsg_len);
+               tmp = ((ucmlen - CMSG32_ALIGN(sizeof(*ucmsg))) +
+                      CMSG_ALIGN(sizeof(struct cmsghdr)));
+               kcmsg->cmsg_len = tmp;
+               __get_user(kcmsg->cmsg_level, &ucmsg->cmsg_level);
+               __get_user(kcmsg->cmsg_type, &ucmsg->cmsg_type);
+
+               /* Copy over the data. */
+               if(copy_from_user(CMSG_DATA(kcmsg),
+                                 CMSG32_DATA(ucmsg),
+                                 (ucmlen - CMSG32_ALIGN(sizeof(*ucmsg)))))
+                       goto out_free_efault;
+
+               /* Advance. */
+               kcmsg = (struct cmsghdr *)((char *)kcmsg + CMSG_ALIGN(tmp));
+               ucmsg = CMSG32_NXTHDR(kmsg, ucmsg, ucmlen);
+       }
+
+       /* Ok, looks like we made it.  Hook it up and return success. */
+       kmsg->msg_control = kcmsg_base;
+       kmsg->msg_controllen = kcmlen;
+       return 0;
+
+out_free_efault:
+       if(kcmsg_base != (struct cmsghdr *)stackbuf)
+               kfree(kcmsg_base);
+       return -EFAULT;
+}
+
+static void put_cmsg32(struct msghdr *kmsg, int level, int type,
+                      int len, void *data)
+{
+       struct cmsghdr32 *cm = (struct cmsghdr32 *) kmsg->msg_control;
+       struct cmsghdr32 cmhdr;
+       int cmlen = CMSG32_LEN(len);
+
+       if(cm == NULL || kmsg->msg_controllen < sizeof(*cm)) {
+               kmsg->msg_flags |= MSG_CTRUNC;
+               return;
+       }
+
+       if(kmsg->msg_controllen < cmlen) {
+               kmsg->msg_flags |= MSG_CTRUNC;
+               cmlen = kmsg->msg_controllen;
+       }
+       cmhdr.cmsg_level = level;
+       cmhdr.cmsg_type = type;
+       cmhdr.cmsg_len = cmlen;
+
+       if(copy_to_user(cm, &cmhdr, sizeof cmhdr))
+               return;
+       if(copy_to_user(CMSG32_DATA(cm), data, cmlen - sizeof(struct cmsghdr32)))
+               return;
+       cmlen = CMSG32_SPACE(len);
+       kmsg->msg_control += cmlen;
+       kmsg->msg_controllen -= cmlen;
+}
+
+static void scm_detach_fds32(struct msghdr *kmsg, struct scm_cookie *scm)
+{
+       struct cmsghdr32 *cm = (struct cmsghdr32 *) kmsg->msg_control;
+       int fdmax = (kmsg->msg_controllen - sizeof(struct cmsghdr32)) / sizeof(int);
+       int fdnum = scm->fp->count;
+       struct file **fp = scm->fp->fp;
+       int *cmfptr;
+       int err = 0, i;
+
+       if (fdnum < fdmax)
+               fdmax = fdnum;
+
+       for (i = 0, cmfptr = (int *) CMSG32_DATA(cm); i < fdmax; i++, cmfptr++) {
+               int new_fd;
+               err = get_unused_fd();
+               if (err < 0)
+                       break;
+               new_fd = err;
+               err = put_user(new_fd, cmfptr);
+               if (err) {
+                       put_unused_fd(new_fd);
+                       break;
+               }
+               /* Bump the usage count and install the file. */
+               get_file(fp[i]);
+               fd_install(new_fd, fp[i]);
+       }
+
+       if (i > 0) {
+               int cmlen = CMSG32_LEN(i * sizeof(int));
+               if (!err)
+                       err = put_user(SOL_SOCKET, &cm->cmsg_level);
+               if (!err)
+                       err = put_user(SCM_RIGHTS, &cm->cmsg_type);
+               if (!err)
+                       err = put_user(cmlen, &cm->cmsg_len);
+               if (!err) {
+                       cmlen = CMSG32_SPACE(i * sizeof(int));
+                       kmsg->msg_control += cmlen;
+                       kmsg->msg_controllen -= cmlen;
+               }
+       }
+       if (i < fdnum)
+               kmsg->msg_flags |= MSG_CTRUNC;
+
+       /*
+        * All of the files that fit in the message have had their
+        * usage counts incremented, so we just free the list.
+        */
+       __scm_destroy(scm);
+}
+
+/* In these cases we (currently) can just copy to data over verbatim
+ * because all CMSGs created by the kernel have well defined types which
+ * have the same layout in both the 32-bit and 64-bit API.  One must add
+ * some special cased conversions here if we start sending control messages
+ * with incompatible types.
+ *
+ * SCM_RIGHTS and SCM_CREDENTIALS are done by hand in recvmsg32 right after
+ * we do our work.  The remaining cases are:
+ *
+ * SOL_IP      IP_PKTINFO      struct in_pktinfo       32-bit clean
+ *             IP_TTL          int                     32-bit clean
+ *             IP_TOS          __u8                    32-bit clean
+ *             IP_RECVOPTS     variable length         32-bit clean
+ *             IP_RETOPTS      variable length         32-bit clean
+ *             (these last two are clean because the types are defined
+ *              by the IPv4 protocol)
+ *             IP_RECVERR      struct sock_extended_err +
+ *                             struct sockaddr_in      32-bit clean
+ * SOL_IPV6    IPV6_RECVERR    struct sock_extended_err +
+ *                             struct sockaddr_in6     32-bit clean
+ *             IPV6_PKTINFO    struct in6_pktinfo      32-bit clean
+ *             IPV6_HOPLIMIT   int                     32-bit clean
+ *             IPV6_FLOWINFO   u32                     32-bit clean
+ *             IPV6_HOPOPTS    ipv6 hop exthdr         32-bit clean
+ *             IPV6_DSTOPTS    ipv6 dst exthdr(s)      32-bit clean
+ *             IPV6_RTHDR      ipv6 routing exthdr     32-bit clean
+ *             IPV6_AUTHHDR    ipv6 auth exthdr        32-bit clean
+ */
+static void cmsg32_recvmsg_fixup(struct msghdr *kmsg, unsigned long orig_cmsg_uptr)
+{
+       unsigned char *workbuf, *wp;
+       unsigned long bufsz, space_avail;
+       struct cmsghdr *ucmsg;
+
+       bufsz = ((unsigned long)kmsg->msg_control) - orig_cmsg_uptr;
+       space_avail = kmsg->msg_controllen + bufsz;
+       wp = workbuf = kmalloc(bufsz, GFP_KERNEL);
+       if(workbuf == NULL)
+               goto fail;
+
+       /* To make this more sane we assume the kernel sends back properly
+        * formatted control messages.  Because of how the kernel will truncate
+        * the cmsg_len for MSG_TRUNC cases, we need not check that case either.
+        */
+       ucmsg = (struct cmsghdr *) orig_cmsg_uptr;
+       while(((unsigned long)ucmsg) <=
+             (((unsigned long)kmsg->msg_control) - sizeof(struct cmsghdr))) {
+               struct cmsghdr32 *kcmsg32 = (struct cmsghdr32 *) wp;
+               int clen64, clen32;
+
+               /* UCMSG is the 64-bit format CMSG entry in user-space.
+                * KCMSG32 is within the kernel space temporary buffer
+                * we use to convert into a 32-bit style CMSG.
+                */
+               __get_user(kcmsg32->cmsg_len, &ucmsg->cmsg_len);
+               __get_user(kcmsg32->cmsg_level, &ucmsg->cmsg_level);
+               __get_user(kcmsg32->cmsg_type, &ucmsg->cmsg_type);
+
+               clen64 = kcmsg32->cmsg_len;
+               copy_from_user(CMSG32_DATA(kcmsg32), CMSG_DATA(ucmsg),
+                              clen64 - CMSG_ALIGN(sizeof(*ucmsg)));
+               clen32 = ((clen64 - CMSG_ALIGN(sizeof(*ucmsg))) +
+                         CMSG32_ALIGN(sizeof(struct cmsghdr32)));
+               kcmsg32->cmsg_len = clen32;
+
+               ucmsg = (struct cmsghdr *) (((char *)ucmsg) + CMSG_ALIGN(clen64));
+               wp = (((char *)kcmsg32) + CMSG32_ALIGN(clen32));
+       }
+
+       /* Copy back fixed up data, and adjust pointers. */
+       bufsz = (wp - workbuf);
+       copy_to_user((void *)orig_cmsg_uptr, workbuf, bufsz);
+
+       kmsg->msg_control = (struct cmsghdr *)
+               (((char *)orig_cmsg_uptr) + bufsz);
+       kmsg->msg_controllen = space_avail - bufsz;
+
+       kfree(workbuf);
+       return;
+
+fail:
+       /* If we leave the 64-bit format CMSG chunks in there,
+        * the application could get confused and crash.  So to
+        * ensure greater recovery, we report no CMSGs.
+        */
+       kmsg->msg_controllen += bufsz;
+       kmsg->msg_control = (void *) orig_cmsg_uptr;
+}
+
+asmlinkage int sys32_sendmsg(int fd, struct msghdr32 *user_msg, unsigned user_flags)
+{
+       struct socket *sock;
+       char address[MAX_SOCK_ADDR];
+       struct iovec iov[UIO_FASTIOV];
+       unsigned char ctl[sizeof(struct cmsghdr) + 20];
+       unsigned char *ctl_buf = ctl;
+       struct msghdr kern_msg;
+       int err, total_len;
+
+       if(msghdr_from_user32_to_kern(&kern_msg, user_msg))
+               return -EFAULT;
+       if(kern_msg.msg_iovlen > UIO_MAXIOV)
+               return -EINVAL;
+       err = verify_iovec32(&kern_msg, iov, address, VERIFY_READ);
+       if (err < 0)
+               goto out;
+       total_len = err;
+
+       if(kern_msg.msg_controllen) {
+               err = cmsghdr_from_user32_to_kern(&kern_msg, ctl, sizeof(ctl));
+               if(err)
+                       goto out_freeiov;
+               ctl_buf = kern_msg.msg_control;
+       }
+       kern_msg.msg_flags = user_flags;
+
+       sock = sockfd_lookup(fd, &err);
+       if (sock != NULL) {
+               if (sock->file->f_flags & O_NONBLOCK)
+                       kern_msg.msg_flags |= MSG_DONTWAIT;
+               err = sock_sendmsg(sock, &kern_msg, total_len);
+               sockfd_put(sock);
+       }
+
+       /* N.B. Use kfree here, as kern_msg.msg_controllen might change? */
+       if(ctl_buf != ctl)
+               kfree(ctl_buf);
+out_freeiov:
+       if(kern_msg.msg_iov != iov)
+               kfree(kern_msg.msg_iov);
+out:
+       return err;
+}
+
+asmlinkage int sys32_recvmsg(int fd, struct msghdr32 *user_msg, unsigned int user_flags)
+{
+       struct iovec iovstack[UIO_FASTIOV];
+       struct msghdr kern_msg;
+       char addr[MAX_SOCK_ADDR];
+       struct socket *sock;
+       struct iovec *iov = iovstack;
+       struct sockaddr *uaddr;
+       int *uaddr_len;
+       unsigned long cmsg_ptr;
+       int err, total_len, len = 0;
+
+       if(msghdr_from_user32_to_kern(&kern_msg, user_msg))
+               return -EFAULT;
+       if(kern_msg.msg_iovlen > UIO_MAXIOV)
+               return -EINVAL;
+
+       uaddr = kern_msg.msg_name;
+       uaddr_len = &user_msg->msg_namelen;
+       err = verify_iovec32(&kern_msg, iov, addr, VERIFY_WRITE);
+       if (err < 0)
+               goto out;
+       total_len = err;
+
+       cmsg_ptr = (unsigned long) kern_msg.msg_control;
+       kern_msg.msg_flags = 0;
+
+       sock = sockfd_lookup(fd, &err);
+       if (sock != NULL) {
+               struct scm_cookie scm;
+
+               if (sock->file->f_flags & O_NONBLOCK)
+                       user_flags |= MSG_DONTWAIT;
+               memset(&scm, 0, sizeof(scm));
+               err = sock->ops->recvmsg(sock, &kern_msg, total_len,
+                                        user_flags, &scm);
+               if(err >= 0) {
+                       len = err;
+                       if(!kern_msg.msg_control) {
+                               if(sock->passcred || scm.fp)
+                                       kern_msg.msg_flags |= MSG_CTRUNC;
+                               if(scm.fp)
+                                       __scm_destroy(&scm);
+                       } else {
+                               /* If recvmsg processing itself placed some
+                                * control messages into user space, it's is
+                                * using 64-bit CMSG processing, so we need
+                                * to fix it up before we tack on more stuff.
+                                */
+                               if((unsigned long) kern_msg.msg_control != cmsg_ptr)
+                                       cmsg32_recvmsg_fixup(&kern_msg, cmsg_ptr);
+
+                               /* Wheee... */
+                               if(sock->passcred)
+                                       put_cmsg32(&kern_msg,
+                                                  SOL_SOCKET, SCM_CREDENTIALS,
+                                                  sizeof(scm.creds), &scm.creds);
+                               if(scm.fp != NULL)
+                                       scm_detach_fds32(&kern_msg, &scm);
+                       }
+               }
+               sockfd_put(sock);
+       }
+
+       if(uaddr != NULL && err >= 0)
+               err = move_addr_to_user(addr, kern_msg.msg_namelen, uaddr, uaddr_len);
+       if(cmsg_ptr != 0 && err >= 0) {
+               unsigned long ucmsg_ptr = ((unsigned long)kern_msg.msg_control);
+               __kernel_size_t32 uclen = (__kernel_size_t32) (ucmsg_ptr - cmsg_ptr);
+               err |= __put_user(uclen, &user_msg->msg_controllen);
+       }
+       if(err >= 0)
+               err = __put_user(kern_msg.msg_flags, &user_msg->msg_flags);
+       if(kern_msg.msg_iov != iov)
+               kfree(kern_msg.msg_iov);
+out:
+       if(err < 0)
+               return err;
+       return len;
+}
+
+extern asmlinkage int sys_setsockopt(int fd, int level, int optname,
+                                    char *optval, int optlen);
+
+static int do_set_attach_filter(int fd, int level, int optname,
+                               char *optval, int optlen)
+{
+       struct sock_fprog32 {
+               __u16 len;
+               __u32 filter;
+       } *fprog32 = (struct sock_fprog32 *)optval;
+       struct sock_fprog kfprog;
+       struct sock_filter *kfilter;
+       unsigned int fsize;
+       mm_segment_t old_fs;
+       __u32 uptr;
+       int ret;
+
+       if (get_user(kfprog.len, &fprog32->len) ||
+           __get_user(uptr, &fprog32->filter))
+               return -EFAULT;
+
+       kfprog.filter = (struct sock_filter *)A(uptr);
+       fsize = kfprog.len * sizeof(struct sock_filter);
+
+       kfilter = (struct sock_filter *)kmalloc(fsize, GFP_KERNEL);
+       if (kfilter == NULL)
+               return -ENOMEM;
+
+       if (copy_from_user(kfilter, kfprog.filter, fsize)) {
+               kfree(kfilter);
+               return -EFAULT;
+       }
+
+       kfprog.filter = kfilter;
+
+       old_fs = get_fs();
+       set_fs(KERNEL_DS);
+       ret = sys_setsockopt(fd, level, optname,
+                            (char *)&kfprog, sizeof(kfprog));
+       set_fs(old_fs);
+
+       kfree(kfilter);
+
+       return ret;
+}
+
+static int do_set_icmpv6_filter(int fd, int level, int optname,
+                               char *optval, int optlen)
+{
+       struct icmp6_filter kfilter;
+       mm_segment_t old_fs;
+       int ret, i;
+
+       if (copy_from_user(&kfilter, optval, sizeof(kfilter)))
+               return -EFAULT;
+
+
+       for (i = 0; i < 8; i += 2) {
+               u32 tmp = kfilter.data[i];
+
+               kfilter.data[i] = kfilter.data[i + 1];
+               kfilter.data[i + 1] = tmp;
+       }
+
+       old_fs = get_fs();
+       set_fs(KERNEL_DS);
+       ret = sys_setsockopt(fd, level, optname,
+                            (char *) &kfilter, sizeof(kfilter));
+       set_fs(old_fs);
+
+       return ret;
+}
+
+asmlinkage int sys32_setsockopt(int fd, int level, int optname,
+                               char *optval, int optlen)
+{
+       if (optname == SO_ATTACH_FILTER)
+               return do_set_attach_filter(fd, level, optname,
+                                           optval, optlen);
+       if (level == SOL_ICMPV6 && optname == ICMPV6_FILTER)
+               return do_set_icmpv6_filter(fd, level, optname,
+                                           optval, optlen);
+
+       return sys_setsockopt(fd, level, optname, optval, optlen);
+}
+
+
+/* Argument list sizes for sys_socketcall */
+#define AL(x) ((x) * sizeof(u32))
+static unsigned char nas[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
+                                AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
+                                AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
+#undef AL
+
+extern asmlinkage long sys_bind(int fd, struct sockaddr *umyaddr, int addrlen);
+extern asmlinkage long sys_connect(int fd, struct sockaddr *uservaddr,
+                                 int addrlen);
+extern asmlinkage long sys_accept(int fd, struct sockaddr *upeer_sockaddr,
+                                int *upeer_addrlen); 
+extern asmlinkage long sys_getsockname(int fd, struct sockaddr *usockaddr,
+                                     int *usockaddr_len);
+extern asmlinkage long sys_getpeername(int fd, struct sockaddr *usockaddr,
+                                     int *usockaddr_len);
+extern asmlinkage long sys_send(int fd, void *buff, size_t len, unsigned flags);
+extern asmlinkage long sys_sendto(int fd, u32 buff, __kernel_size_t32 len,
+                                  unsigned flags, u32 addr, int addr_len);
+extern asmlinkage long sys_recv(int fd, void *ubuf, size_t size, unsigned flags);
+extern asmlinkage long sys_recvfrom(int fd, u32 ubuf, __kernel_size_t32 size,
+                                    unsigned flags, u32 addr, u32 addr_len);
+extern asmlinkage long sys_getsockopt(int fd, int level, int optname,
+                                      u32 optval, u32 optlen);
+
+extern asmlinkage long sys_socket(int family, int type, int protocol);
+extern asmlinkage long sys_socketpair(int family, int type, int protocol,
+                                    int usockvec[2]);
+extern asmlinkage long sys_shutdown(int fd, int how);
+extern asmlinkage long sys_listen(int fd, int backlog);
+
+asmlinkage long sys32_socketcall(int call, u32 *args)
+{
+       int ret;
+       u32 a[6];
+       u32 a0,a1;
+                                
+       if (call<SYS_SOCKET||call>SYS_RECVMSG)
+               return -EINVAL;
+       if (copy_from_user(a, args, nas[call]))
+               return -EFAULT;
+       a0=a[0];
+       a1=a[1];
+       
+       switch(call) 
+       {
+               case SYS_SOCKET:
+                       ret = sys_socket(a0, a1, a[2]);
+                       break;
+               case SYS_BIND:
+                       ret = sys_bind(a0, (struct sockaddr *)A(a1), a[2]);
+                       break;
+               case SYS_CONNECT:
+                       ret = sys_connect(a0, (struct sockaddr *)A(a1), a[2]);
+                       break;
+               case SYS_LISTEN:
+                       ret = sys_listen(a0, a1);
+                       break;
+               case SYS_ACCEPT:
+                       ret = sys_accept(a0, (struct sockaddr *)A(a1),
+                                         (int *)A(a[2]));
+                       break;
+               case SYS_GETSOCKNAME:
+                       ret = sys_getsockname(a0, (struct sockaddr *)A(a1),
+                                              (int *)A(a[2]));
+                       break;
+               case SYS_GETPEERNAME:
+                       ret = sys_getpeername(a0, (struct sockaddr *)A(a1),
+                                              (int *)A(a[2]));
+                       break;
+               case SYS_SOCKETPAIR:
+                       ret = sys_socketpair(a0, a1, a[2], (int *)A(a[3]));
+                       break;
+               case SYS_SEND:
+                       ret = sys_send(a0, (void *)A(a1), a[2], a[3]);
+                       break;
+               case SYS_SENDTO:
+                       ret = sys_sendto(a0, a1, a[2], a[3], a[4], a[5]);
+                       break;
+               case SYS_RECV:
+                       ret = sys_recv(a0, (void *)A(a1), a[2], a[3]);
+                       break;
+               case SYS_RECVFROM:
+                       ret = sys_recvfrom(a0, a1, a[2], a[3], a[4], a[5]);
+                       break;
+               case SYS_SHUTDOWN:
+                       ret = sys_shutdown(a0,a1);
+                       break;
+               case SYS_SETSOCKOPT:
+                       ret = sys_setsockopt(a0, a1, a[2], (char *)A(a[3]),
+                                             a[4]);
+                       break;
+               case SYS_GETSOCKOPT:
+                       ret = sys_getsockopt(a0, a1, a[2], a[3], a[4]);
+                       break;
+               case SYS_SENDMSG:
+                       ret = sys32_sendmsg(a0, (struct msghdr32 *)A(a1),
+                                            a[2]);
+                       break;
+               case SYS_RECVMSG:
+                       ret = sys32_recvmsg(a0, (struct msghdr32 *)A(a1),
+                                            a[2]);
+                       break;
+               default:
+                       ret = -EINVAL;
+                       break;
+       }
+       return ret;
+}
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
new file mode 100644 (file)
index 0000000..2406767
--- /dev/null
@@ -0,0 +1,2788 @@
+/*
+ * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on
+ *             sys_sparc32 
+ *
+ * Copyright (C) 2000          VA Linux Co
+ * Copyright (C) 2000          Don Dugger <n0ano@valinux.com>
+ * Copyright (C) 1999          Arun Sharma <arun.sharma@intel.com>
+ * Copyright (C) 1997,1998     Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997          David S. Miller (davem@caip.rutgers.edu)
+ * Copyright (C) 2000          Hewlett-Packard Co.
+ * Copyright (C) 2000          David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2000,2001     Andi Kleen, SuSE Labs (x86-64 port) 
+ *
+ * These routines maintain argument size conversion between 32bit and 64bit
+ * environment. In 2.5 most of this should be moved to a generic directory. 
+ *
+ * This file assumes that there is a hole at the end of user address space.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h> 
+#include <linux/file.h> 
+#include <linux/signal.h>
+#include <linux/utime.h>
+#include <linux/resource.h>
+#include <linux/times.h>
+#include <linux/utsname.h>
+#include <linux/timex.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/mm.h>
+#include <linux/shm.h>
+#include <linux/slab.h>
+#include <linux/uio.h>
+#include <linux/nfs_fs.h>
+#include <linux/smb_fs.h>
+#include <linux/smb_mount.h>
+#include <linux/ncp_fs.h>
+#include <linux/quota.h>
+#include <linux/module.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr.h>
+#include <linux/nfsd/syscall.h>
+#include <linux/poll.h>
+#include <linux/personality.h>
+#include <linux/stat.h>
+#include <linux/ipc.h>
+#include <linux/rwsem.h>
+#include <asm/mman.h>
+#include <asm/types.h>
+#include <asm/uaccess.h>
+#include <asm/semaphore.h>
+#include <asm/ipc.h>
+#include <asm/atomic.h>
+
+#include <net/scm.h>
+#include <net/sock.h>
+#include <asm/ia32.h>
+
+#define A(__x)         ((unsigned long)(__x))
+#define AA(__x)                ((unsigned long)(__x))
+#define ROUND_UP(x,a)  ((__typeof__(x))(((unsigned long)(x) + ((a) - 1)) & ~((a) - 1)))
+#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
+
+static int
+putstat(struct stat32 *ubuf, struct stat *kbuf)
+{
+       if (verify_area(VERIFY_WRITE, ubuf, sizeof(struct stat32)) ||
+           __put_user (kbuf->st_dev, &ubuf->st_dev) ||
+           __put_user (kbuf->st_ino, &ubuf->st_ino) ||
+           __put_user (kbuf->st_mode, &ubuf->st_mode) ||
+           __put_user (kbuf->st_nlink, &ubuf->st_nlink) ||
+           __put_user (kbuf->st_uid, &ubuf->st_uid) ||
+           __put_user (kbuf->st_gid, &ubuf->st_gid) ||
+           __put_user (kbuf->st_rdev, &ubuf->st_rdev) ||
+           __put_user (kbuf->st_size, &ubuf->st_size) ||
+           __put_user (kbuf->st_atime, &ubuf->st_atime) ||
+           __put_user (kbuf->st_mtime, &ubuf->st_mtime) ||
+           __put_user (kbuf->st_ctime, &ubuf->st_ctime) ||
+           __put_user (kbuf->st_blksize, &ubuf->st_blksize) ||
+           __put_user (kbuf->st_blocks, &ubuf->st_blocks))
+               return -EFAULT;
+       return 0;
+}
+
+extern asmlinkage long sys_newstat(char * filename, struct stat * statbuf);
+
+asmlinkage long
+sys32_newstat(char * filename, struct stat32 *statbuf)
+{
+       int ret;
+       struct stat s;
+       mm_segment_t old_fs = get_fs();
+       
+       set_fs (KERNEL_DS);
+       ret = sys_newstat(filename, &s);
+       set_fs (old_fs);
+       if (putstat (statbuf, &s))
+               return -EFAULT;
+       return ret;
+}
+
+extern asmlinkage long sys_newlstat(char * filename, struct stat * statbuf);
+
+asmlinkage long
+sys32_newlstat(char * filename, struct stat32 *statbuf)
+{
+       int ret;
+       struct stat s;
+       mm_segment_t old_fs = get_fs();
+       
+       set_fs (KERNEL_DS);
+       ret = sys_newlstat(filename, &s);
+       set_fs (old_fs);
+       if (putstat (statbuf, &s))
+               return -EFAULT;
+       return ret;
+}
+
+extern asmlinkage long sys_newfstat(unsigned int fd, struct stat * statbuf);
+
+asmlinkage long
+sys32_newfstat(unsigned int fd, struct stat32 *statbuf)
+{
+       int ret;
+       struct stat s;
+       mm_segment_t old_fs = get_fs();
+       
+       set_fs (KERNEL_DS);
+       ret = sys_newfstat(fd, &s);
+       set_fs (old_fs);
+       if (putstat (statbuf, &s))
+               return -EFAULT;
+       return ret;
+}
+
+/* Another set for IA32/LFS -- x86_64 struct stat is different due to 
+   support for 64bit inode numbers. */
+
+static int
+putstat64(struct stat64 *ubuf, struct stat *kbuf)
+{
+       if (verify_area(VERIFY_WRITE, ubuf, sizeof(struct stat64)) ||
+           __put_user (kbuf->st_dev, &ubuf->st_dev) ||
+           __put_user (kbuf->st_ino, &ubuf->__st_ino) ||
+           __put_user (kbuf->st_ino, &ubuf->st_ino) ||
+           __put_user (kbuf->st_mode, &ubuf->st_mode) ||
+           __put_user (kbuf->st_nlink, &ubuf->st_nlink) ||
+           __put_user (kbuf->st_uid, &ubuf->st_uid) ||
+           __put_user (kbuf->st_gid, &ubuf->st_gid) ||
+           __put_user (kbuf->st_rdev, &ubuf->st_rdev) ||
+           __put_user (kbuf->st_size, &ubuf->st_size) ||
+           __put_user (kbuf->st_atime, &ubuf->st_atime) ||
+           __put_user (kbuf->st_mtime, &ubuf->st_mtime) ||
+           __put_user (kbuf->st_ctime, &ubuf->st_ctime) ||
+           __put_user (kbuf->st_blksize, &ubuf->st_blksize) ||
+           __put_user (kbuf->st_blocks, &ubuf->st_blocks))
+               return -EFAULT;
+       return 0;
+}
+
+asmlinkage long
+sys32_stat64(char * filename, struct stat64 *statbuf)
+{
+       int ret;
+       struct stat s;
+       mm_segment_t old_fs = get_fs();
+       
+       set_fs (KERNEL_DS);
+       ret = sys_newstat(filename, &s);
+       set_fs (old_fs);
+       if (putstat64 (statbuf, &s))
+               return -EFAULT;
+       return ret;
+}
+
+asmlinkage long
+sys32_lstat64(char * filename, struct stat64 *statbuf)
+{
+       int ret;
+       struct stat s;
+       mm_segment_t old_fs = get_fs();
+       
+       set_fs (KERNEL_DS);
+       ret = sys_newlstat(filename, &s);
+       set_fs (old_fs);
+       if (putstat64 (statbuf, &s))
+               return -EFAULT;
+       return ret;
+}
+
+asmlinkage long
+sys32_fstat64(unsigned int fd, struct stat64 *statbuf)
+{
+       int ret;
+       struct stat s;
+       mm_segment_t old_fs = get_fs();
+       
+       set_fs (KERNEL_DS);
+       ret = sys_newfstat(fd, &s);
+       set_fs (old_fs);
+       if (putstat64 (statbuf, &s))
+               return -EFAULT;
+       return ret;
+}
+
+
+
+/*
+ * Linux/i386 didn't use to be able to handle more than
+ * 4 system call parameters, so these system calls used a memory
+ * block for parameter passing..
+ */
+
+struct mmap_arg_struct {
+       unsigned int addr;
+       unsigned int len;
+       unsigned int prot;
+       unsigned int flags;
+       unsigned int fd;
+       unsigned int offset;
+};
+
+asmlinkage __u32
+sys32_mmap(struct mmap_arg_struct *arg)
+{
+       struct mmap_arg_struct a;
+       struct file *file = NULL;
+       unsigned long retval;
+       struct mm_struct *mm ;
+
+       if (copy_from_user(&a, arg, sizeof(a)))
+               return -EFAULT;
+
+       if (a.offset & ~PAGE_MASK)
+               return -EINVAL; 
+
+       if (!(a.flags & MAP_ANONYMOUS)) {
+               file = fget(a.fd);
+               if (!file)
+                       return -EBADF;
+       }
+
+       mm = current->mm; 
+       down_write(&mm->mmap_sem); 
+       retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, a.offset>>PAGE_SHIFT);
+       if (file)
+               fput(file);
+
+       if (retval >= 0xFFFFFFFF) { 
+               do_munmap(mm, retval, a.len); 
+               retval = -ENOMEM; 
+       } 
+       up_write(&mm->mmap_sem); 
+
+
+
+       return retval;
+}
+
+asmlinkage long
+sys32_pipe(int *fd)
+{
+       int retval;
+       int fds[2];
+
+       retval = do_pipe(fds);
+       if (retval)
+               goto out;
+       if (copy_to_user(fd, fds, sizeof(fds)))
+               retval = -EFAULT;
+  out:
+       return retval;
+}
+
+asmlinkage long
+sys32_rt_sigaction(int sig, struct sigaction32 *act,
+                  struct sigaction32 *oact,  unsigned int sigsetsize)
+{
+       struct k_sigaction new_ka, old_ka;
+       int ret;
+       sigset32_t set32;
+
+       /* XXX: Don't preclude handling different sized sigset_t's.  */
+       if (sigsetsize != sizeof(sigset32_t))
+               return -EINVAL;
+
+       if (act) {
+               if (verify_area(VERIFY_READ, act, sizeof(*act)) ||
+                   __get_user((long)new_ka.sa.sa_handler, &act->sa_handler) ||
+                   __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
+                   __get_user((long)new_ka.sa.sa_restorer, &act->sa_restorer)||
+                   __copy_from_user(&set32, &act->sa_mask, sizeof(sigset32_t)))
+                       return -EFAULT;
+
+               /* FIXME: here we rely on _IA32_NSIG_WORS to be >= than _NSIG_WORDS << 1 */
+               switch (_NSIG_WORDS) {
+               case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6]
+                               | (((long)set32.sig[7]) << 32);
+               case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4]
+                               | (((long)set32.sig[5]) << 32);
+               case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2]
+                               | (((long)set32.sig[3]) << 32);
+               case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0]
+                               | (((long)set32.sig[1]) << 32);
+               }
+       }
+
+       ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+       if (!ret && oact) {
+               /* FIXME: here we rely on _IA32_NSIG_WORS to be >= than _NSIG_WORDS << 1 */
+               switch (_NSIG_WORDS) {
+               case 4:
+                       set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32);
+                       set32.sig[6] = old_ka.sa.sa_mask.sig[3];
+               case 3:
+                       set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32);
+                       set32.sig[4] = old_ka.sa.sa_mask.sig[2];
+               case 2:
+                       set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32);
+                       set32.sig[2] = old_ka.sa.sa_mask.sig[1];
+               case 1:
+                       set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32);
+                       set32.sig[0] = old_ka.sa.sa_mask.sig[0];
+               }
+               if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) ||
+                   __put_user((long)old_ka.sa.sa_handler, &oact->sa_handler) ||
+                   __put_user((long)old_ka.sa.sa_restorer, &oact->sa_restorer) ||
+                   __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
+                   __copy_to_user(&oact->sa_mask, &set32, sizeof(sigset32_t)))
+                       return -EFAULT;
+       }
+
+       return ret;
+}
+
+asmlinkage long
+sys32_sigaction (int sig, struct old_sigaction32 *act, struct old_sigaction32 *oact)
+{
+        struct k_sigaction new_ka, old_ka;
+        int ret;
+
+        if (act) {
+               old_sigset32_t mask;
+
+               if (verify_area(VERIFY_READ, act, sizeof(*act)) ||
+                   __get_user((long)new_ka.sa.sa_handler, &act->sa_handler) ||
+                   __get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
+                   __get_user((long)new_ka.sa.sa_restorer, &act->sa_restorer) ||
+                   __get_user(mask, &act->sa_mask))
+                       return -EFAULT;
+               siginitset(&new_ka.sa.sa_mask, mask);
+        }
+
+        ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+
+       if (!ret && oact) {
+               if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) ||
+                   __put_user((long)old_ka.sa.sa_handler, &oact->sa_handler) ||
+                   __put_user((long)old_ka.sa.sa_restorer, &oact->sa_restorer) ||
+                   __put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
+                   __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
+                       return -EFAULT;
+        }
+
+       return ret;
+}
+
+extern asmlinkage long sys_rt_sigprocmask(int how, sigset_t *set, sigset_t *oset,
+                                         size_t sigsetsize);
+
+asmlinkage long
+sys32_rt_sigprocmask(int how, sigset32_t *set, sigset32_t *oset,
+                    unsigned int sigsetsize)
+{
+       sigset_t s;
+       sigset32_t s32;
+       int ret;
+       mm_segment_t old_fs = get_fs();
+       
+       if (set) {
+               if (copy_from_user (&s32, set, sizeof(sigset32_t)))
+                       return -EFAULT;
+               switch (_NSIG_WORDS) {
+               case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
+               case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
+               case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
+               case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
+               }
+       }
+       set_fs (KERNEL_DS);
+       ret = sys_rt_sigprocmask(how, set ? &s : NULL, oset ? &s : NULL,
+                                sigsetsize); 
+       set_fs (old_fs);
+       if (ret) return ret;
+       if (oset) {
+               switch (_NSIG_WORDS) {
+               case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
+               case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
+               case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
+               case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
+               }
+               if (copy_to_user (oset, &s32, sizeof(sigset32_t)))
+                       return -EFAULT;
+       }
+       return 0;
+}
+
+static int
+put_statfs (struct statfs32 *ubuf, struct statfs *kbuf)
+{
+       if (verify_area(VERIFY_WRITE, ubuf, sizeof(struct statfs32)) ||
+           __put_user (kbuf->f_type, &ubuf->f_type) ||
+           __put_user (kbuf->f_bsize, &ubuf->f_bsize) ||
+           __put_user (kbuf->f_blocks, &ubuf->f_blocks) ||
+           __put_user (kbuf->f_bfree, &ubuf->f_bfree) ||
+           __put_user (kbuf->f_bavail, &ubuf->f_bavail) ||
+           __put_user (kbuf->f_files, &ubuf->f_files) ||
+           __put_user (kbuf->f_ffree, &ubuf->f_ffree) ||
+           __put_user (kbuf->f_namelen, &ubuf->f_namelen) ||
+           __put_user (kbuf->f_fsid.val[0], &ubuf->f_fsid.val[0]) ||
+           __put_user (kbuf->f_fsid.val[1], &ubuf->f_fsid.val[1]))
+               return -EFAULT;
+       return 0;
+}
+
+extern asmlinkage long sys_statfs(const char * path, struct statfs * buf);
+
+asmlinkage long
+sys32_statfs(const char * path, struct statfs32 *buf)
+{
+       int ret;
+       struct statfs s;
+       mm_segment_t old_fs = get_fs();
+       
+       set_fs (KERNEL_DS);
+       ret = sys_statfs((const char *)path, &s);
+       set_fs (old_fs);
+       if (put_statfs(buf, &s))
+               return -EFAULT;
+       return ret;
+}
+
+extern asmlinkage long sys_fstatfs(unsigned int fd, struct statfs * buf);
+
+asmlinkage long
+sys32_fstatfs(unsigned int fd, struct statfs32 *buf)
+{
+       int ret;
+       struct statfs s;
+       mm_segment_t old_fs = get_fs();
+       
+       set_fs (KERNEL_DS);
+       ret = sys_fstatfs(fd, &s);
+       set_fs (old_fs);
+       if (put_statfs(buf, &s))
+               return -EFAULT;
+       return ret;
+}
+
+struct timeval32
+{
+    int tv_sec, tv_usec;
+};
+
+struct itimerval32
+{
+    struct timeval32 it_interval;
+    struct timeval32 it_value;
+};
+
+static inline long
+get_tv32(struct timeval *o, struct timeval32 *i)
+{
+       return (!access_ok(VERIFY_READ, i, sizeof(*i)) ||
+               __get_user(o->tv_sec, &i->tv_sec) ||
+               __get_user(o->tv_usec, &i->tv_usec));
+       return ENOSYS;
+}
+
+static inline long
+put_tv32(struct timeval32 *o, struct timeval *i)
+{
+       return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
+               __put_user(i->tv_sec, &o->tv_sec) ||
+               __put_user(i->tv_usec, &o->tv_usec));
+}
+
+static inline long
+get_it32(struct itimerval *o, struct itimerval32 *i)
+{
+       return (!access_ok(VERIFY_READ, i, sizeof(*i)) ||
+               __get_user(o->it_interval.tv_sec, &i->it_interval.tv_sec) ||
+               __get_user(o->it_interval.tv_usec, &i->it_interval.tv_usec) ||
+               __get_user(o->it_value.tv_sec, &i->it_value.tv_sec) ||
+               __get_user(o->it_value.tv_usec, &i->it_value.tv_usec));
+       return ENOSYS;
+}
+
+static inline long
+put_it32(struct itimerval32 *o, struct itimerval *i)
+{
+       return (!access_ok(VERIFY_WRITE, i, sizeof(*i)) ||
+               __put_user(i->it_interval.tv_sec, &o->it_interval.tv_sec) ||
+               __put_user(i->it_interval.tv_usec, &o->it_interval.tv_usec) ||
+               __put_user(i->it_value.tv_sec, &o->it_value.tv_sec) ||
+               __put_user(i->it_value.tv_usec, &o->it_value.tv_usec));
+       return ENOSYS;
+}
+
+extern int do_getitimer(int which, struct itimerval *value);
+
+asmlinkage long
+sys32_getitimer(int which, struct itimerval32 *it)
+{
+       struct itimerval kit;
+       int error;
+
+       error = do_getitimer(which, &kit);
+       if (!error && put_it32(it, &kit))
+               error = -EFAULT;
+
+       return error;
+}
+
+extern int do_setitimer(int which, struct itimerval *, struct itimerval *);
+
+asmlinkage long
+sys32_setitimer(int which, struct itimerval32 *in, struct itimerval32 *out)
+{
+       struct itimerval kin, kout;
+       int error;
+
+       if (in) {
+               if (get_it32(&kin, in))
+                       return -EFAULT;
+       } else
+               memset(&kin, 0, sizeof(kin));
+
+       error = do_setitimer(which, &kin, out ? &kout : NULL);
+       if (error || !out)
+               return error;
+       if (put_it32(out, &kout))
+               return -EFAULT;
+
+       return 0;
+
+}
+asmlinkage unsigned long 
+sys32_alarm(unsigned int seconds)
+{
+       struct itimerval it_new, it_old;
+       unsigned int oldalarm;
+
+       it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
+       it_new.it_value.tv_sec = seconds;
+       it_new.it_value.tv_usec = 0;
+       do_setitimer(ITIMER_REAL, &it_new, &it_old);
+       oldalarm = it_old.it_value.tv_sec;
+       /* ehhh.. We can't return 0 if we have an alarm pending.. */
+       /* And we'd better return too much than too little anyway */
+       if (it_old.it_value.tv_usec)
+               oldalarm++;
+       return oldalarm;
+}
+
+/* Translations due to time_t size differences.  Which affects all
+   sorts of things, like timeval and itimerval.  */
+
+struct utimbuf_32 {
+       int     atime;
+       int     mtime;
+};
+
+extern asmlinkage long sys_utimes(char * filename, struct timeval * utimes);
+extern asmlinkage long sys_gettimeofday (struct timeval *tv, struct timezone *tz);
+
+asmlinkage long
+ia32_utime(char * filename, struct utimbuf_32 *times32)
+{
+       mm_segment_t old_fs = get_fs();
+       struct timeval tv[2];
+       long ret;
+
+       if (times32) {
+               get_user(tv[0].tv_sec, &times32->atime);
+               tv[0].tv_usec = 0;
+               get_user(tv[1].tv_sec, &times32->mtime);
+               tv[1].tv_usec = 0;
+               set_fs (KERNEL_DS);
+       } else {
+               set_fs (KERNEL_DS);
+               ret = sys_gettimeofday(&tv[0], 0);
+               if (ret < 0)
+                       goto out;
+               tv[1] = tv[0];
+       }
+       ret = sys_utimes(filename, tv);
+  out:
+       set_fs (old_fs);
+       return ret;
+}
+
+extern struct timezone sys_tz;
+extern int do_sys_settimeofday(struct timeval *tv, struct timezone *tz);
+
+asmlinkage long
+sys32_gettimeofday(struct timeval32 *tv, struct timezone *tz)
+{
+       if (tv) {
+               struct timeval ktv;
+               do_gettimeofday(&ktv);
+               if (put_tv32(tv, &ktv))
+                       return -EFAULT;
+       }
+       if (tz) {
+               if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
+                       return -EFAULT;
+       }
+       return 0;
+}
+
+asmlinkage long
+sys32_settimeofday(struct timeval32 *tv, struct timezone *tz)
+{
+       struct timeval ktv;
+       struct timezone ktz;
+
+       if (tv) {
+               if (get_tv32(&ktv, tv))
+                       return -EFAULT;
+       }
+       if (tz) {
+               if (copy_from_user(&ktz, tz, sizeof(ktz)))
+                       return -EFAULT;
+       }
+
+       return do_sys_settimeofday(tv ? &ktv : NULL, tz ? &ktz : NULL);
+}
+
+struct linux32_dirent {
+       u32     d_ino;
+       u32     d_off;
+       u16     d_reclen;
+       char    d_name[1];
+};
+
+struct old_linux32_dirent {
+       u32     d_ino;
+       u32     d_offset;
+       u16     d_namlen;
+       char    d_name[1];
+};
+
+struct getdents32_callback {
+       struct linux32_dirent * current_dir;
+       struct linux32_dirent * previous;
+       int count;
+       int error;
+};
+
+struct readdir32_callback {
+       struct old_linux32_dirent * dirent;
+       int count;
+};
+
+static int
+filldir32 (void *__buf, const char *name, int namlen, loff_t offset, ino_t ino,
+          unsigned int d_type)
+{
+       struct linux32_dirent * dirent;
+       struct getdents32_callback * buf = (struct getdents32_callback *) __buf;
+       int reclen = ROUND_UP(NAME_OFFSET(dirent) + namlen + 1, 4);
+
+       buf->error = -EINVAL;   /* only used if we fail.. */
+       if (reclen > buf->count)
+               return -EINVAL;
+       dirent = buf->previous;
+       if (dirent)
+               put_user(offset, &dirent->d_off);
+       dirent = buf->current_dir;
+       buf->previous = dirent;
+       put_user(ino, &dirent->d_ino);
+       put_user(reclen, &dirent->d_reclen);
+       copy_to_user(dirent->d_name, name, namlen);
+       put_user(0, dirent->d_name + namlen);
+       ((char *) dirent) += reclen;
+       buf->current_dir = dirent;
+       buf->count -= reclen;
+       return 0;
+}
+
+asmlinkage long
+sys32_getdents (unsigned int fd, void * dirent, unsigned int count)
+{
+       struct file * file;
+       struct linux32_dirent * lastdirent;
+       struct getdents32_callback buf;
+       int error;
+
+       error = -EBADF;
+       file = fget(fd);
+       if (!file)
+               goto out;
+
+       buf.current_dir = (struct linux32_dirent *) dirent;
+       buf.previous = NULL;
+       buf.count = count;
+       buf.error = 0;
+
+       error = vfs_readdir(file, filldir32, &buf);
+       if (error < 0)
+               goto out_putf;
+       error = buf.error;
+       lastdirent = buf.previous;
+       if (lastdirent) {
+               put_user(file->f_pos, &lastdirent->d_off);
+               error = count - buf.count;
+       }
+
+out_putf:
+       fput(file);
+out:
+       return error;
+}
+
+static int
+fillonedir32 (void * __buf, const char * name, int namlen, loff_t offset, ino_t ino, unsigned d_type)
+{
+       struct readdir32_callback * buf = (struct readdir32_callback *) __buf;
+       struct old_linux32_dirent * dirent;
+
+       if (buf->count)
+               return -EINVAL;
+       buf->count++;
+       dirent = buf->dirent;
+       put_user(ino, &dirent->d_ino);
+       put_user(offset, &dirent->d_offset);
+       put_user(namlen, &dirent->d_namlen);
+       copy_to_user(dirent->d_name, name, namlen);
+       put_user(0, dirent->d_name + namlen);
+       return 0;
+}
+
+asmlinkage long
+sys32_oldreaddir (unsigned int fd, void * dirent, unsigned int count)
+{
+       int error;
+       struct file * file;
+       struct readdir32_callback buf;
+
+       error = -EBADF;
+       file = fget(fd);
+       if (!file)
+               goto out;
+
+       buf.count = 0;
+       buf.dirent = dirent;
+
+       error = vfs_readdir(file, fillonedir32, &buf);
+       if (error >= 0)
+               error = buf.count;
+       fput(file);
+out:
+       return error;
+}
+
+/*
+ * We can actually return ERESTARTSYS instead of EINTR, but I'd
+ * like to be certain this leads to no problems. So I return
+ * EINTR just for safety.
+ *
+ * Update: ERESTARTSYS breaks at least the xview clock binary, so
+ * I'm trying ERESTARTNOHAND which restart only when you want to.
+ */
+#define MAX_SELECT_SECONDS \
+       ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
+#define ROUND_UP_TIME(x,y) (((x)+(y)-1)/(y))
+
+asmlinkage long
+sys32_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval32 *tvp32)
+{
+       fd_set_bits fds;
+       char *bits;
+       long timeout;
+       int ret, size;
+
+       timeout = MAX_SCHEDULE_TIMEOUT;
+       if (tvp32) {
+               time_t sec, usec;
+
+               get_user(sec, &tvp32->tv_sec);
+               get_user(usec, &tvp32->tv_usec);
+
+               ret = -EINVAL;
+               if (sec < 0 || usec < 0)
+                       goto out_nofds;
+
+               if ((unsigned long) sec < MAX_SELECT_SECONDS) {
+                       timeout = ROUND_UP_TIME(usec, 1000000/HZ);
+                       timeout += sec * (unsigned long) HZ;
+               }
+       }
+
+       ret = -EINVAL;
+       if (n < 0)
+               goto out_nofds;
+
+       if (n > current->files->max_fdset)
+               n = current->files->max_fdset;
+
+       /*
+        * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
+        * since we used fdset we need to allocate memory in units of
+        * long-words. 
+        */
+       ret = -ENOMEM;
+       size = FDS_BYTES(n);
+       bits = kmalloc(6 * size, GFP_KERNEL);
+       if (!bits)
+               goto out_nofds;
+       fds.in      = (unsigned long *)  bits;
+       fds.out     = (unsigned long *) (bits +   size);
+       fds.ex      = (unsigned long *) (bits + 2*size);
+       fds.res_in  = (unsigned long *) (bits + 3*size);
+       fds.res_out = (unsigned long *) (bits + 4*size);
+       fds.res_ex  = (unsigned long *) (bits + 5*size);
+
+       if ((ret = get_fd_set(n, inp, fds.in)) ||
+           (ret = get_fd_set(n, outp, fds.out)) ||
+           (ret = get_fd_set(n, exp, fds.ex)))
+               goto out;
+       zero_fd_set(n, fds.res_in);
+       zero_fd_set(n, fds.res_out);
+       zero_fd_set(n, fds.res_ex);
+
+       ret = do_select(n, &fds, &timeout);
+
+       if (tvp32 && !(current->personality & STICKY_TIMEOUTS)) {
+               time_t sec = 0, usec = 0;
+               if (timeout) {
+                       sec = timeout / HZ;
+                       usec = timeout % HZ;
+                       usec *= (1000000/HZ);
+               }
+               put_user(sec, (int *)&tvp32->tv_sec);
+               put_user(usec, (int *)&tvp32->tv_usec);
+       }
+
+       if (ret < 0)
+               goto out;
+       if (!ret) {
+               ret = -ERESTARTNOHAND;
+               if (signal_pending(current))
+                       goto out;
+               ret = 0;
+       }
+
+       set_fd_set(n, inp, fds.res_in);
+       set_fd_set(n, outp, fds.res_out);
+       set_fd_set(n, exp, fds.res_ex);
+
+out:
+       kfree(bits);
+out_nofds:
+       return ret;
+}
+
+struct sel_arg_struct {
+       unsigned int n;
+       unsigned int inp;
+       unsigned int outp;
+       unsigned int exp;
+       unsigned int tvp;
+};
+
+asmlinkage long
+sys32_old_select(struct sel_arg_struct *arg)
+{
+       struct sel_arg_struct a;
+
+       if (copy_from_user(&a, arg, sizeof(a)))
+               return -EFAULT;
+       return sys32_select(a.n, (fd_set *)A(a.inp), (fd_set *)A(a.outp), (fd_set *)A(a.exp),
+                           (struct timeval32 *)A(a.tvp));
+}
+
+struct timespec32 {
+       int     tv_sec;
+       int     tv_nsec;
+};
+
+extern asmlinkage long sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp); 
+
+asmlinkage long
+sys32_nanosleep(struct timespec32 *rqtp, struct timespec32 *rmtp)
+{
+       struct timespec t;
+       int ret;
+       mm_segment_t old_fs = get_fs ();
+       
+       if (verify_area(VERIFY_READ, rqtp, sizeof(struct timespec32)) ||
+           __get_user (t.tv_sec, &rqtp->tv_sec) ||
+           __get_user (t.tv_nsec, &rqtp->tv_nsec))
+               return -EFAULT;
+       set_fs (KERNEL_DS);
+       ret = sys_nanosleep(&t, rmtp ? &t : NULL);
+       set_fs (old_fs);
+       if (rmtp && ret == -EINTR) {
+               if (verify_area(VERIFY_WRITE, rmtp, sizeof(struct timespec32)) ||
+                   __put_user (t.tv_sec, &rmtp->tv_sec) ||
+                   __put_user (t.tv_nsec, &rmtp->tv_nsec))
+                       return -EFAULT;
+       }
+       return ret;
+}
+
+asmlinkage ssize_t sys_readv(unsigned long,const struct iovec *,unsigned long);
+asmlinkage ssize_t sys_writev(unsigned long,const struct iovec *,unsigned long);
+
+struct iovec *
+get_iovec32(struct iovec32 *iov32, struct iovec *iov_buf, u32 count, int type)
+{
+       int i;
+       u32 buf, len;
+       struct iovec *ivp, *iov;
+
+       /* Get the "struct iovec" from user memory */
+
+       if (!count)
+               return 0;
+       if(verify_area(VERIFY_READ, iov32, sizeof(struct iovec32)*count))
+               return(struct iovec *)0;
+       if (count > UIO_MAXIOV)
+               return(struct iovec *)0;
+       if (count > UIO_FASTIOV) {
+               iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL);
+               if (!iov)
+                       return((struct iovec *)0);
+       } else
+               iov = iov_buf;
+
+       ivp = iov;
+       for (i = 0; i < count; i++) {
+               if (__get_user(len, &iov32->iov_len) ||
+                   __get_user(buf, &iov32->iov_base)) {
+                       if (iov != iov_buf)
+                               kfree(iov);
+                       return((struct iovec *)0);
+               }
+               if (verify_area(type, (void *)A(buf), len)) {
+                       if (iov != iov_buf)
+                               kfree(iov);
+                       return((struct iovec *)0);
+               }
+               ivp->iov_base = (void *)A(buf);
+               ivp->iov_len = (__kernel_size_t)len;
+               iov32++;
+               ivp++;
+       }
+       return(iov);
+}
+
+asmlinkage long
+sys32_readv(int fd, struct iovec32 *vector, u32 count)
+{
+       struct iovec iovstack[UIO_FASTIOV];
+       struct iovec *iov;
+       int ret;
+       mm_segment_t old_fs = get_fs();
+
+       if ((iov = get_iovec32(vector, iovstack, count, VERIFY_WRITE)) == (struct iovec *)0)
+               return -EFAULT;
+       set_fs(KERNEL_DS);
+       ret = sys_readv(fd, iov, count);
+       set_fs(old_fs);
+       if (iov != iovstack)
+               kfree(iov);
+       return ret;
+}
+
+asmlinkage long
+sys32_writev(int fd, struct iovec32 *vector, u32 count)
+{
+       struct iovec iovstack[UIO_FASTIOV];
+       struct iovec *iov;
+       int ret;
+       mm_segment_t old_fs = get_fs();
+
+       if ((iov = get_iovec32(vector, iovstack, count, VERIFY_READ)) == (struct iovec *)0)
+               return -EFAULT;
+       set_fs(KERNEL_DS);
+       ret = sys_writev(fd, iov, count);
+       set_fs(old_fs);
+       if (iov != iovstack)
+               kfree(iov);
+       return ret;
+}
+
+#define RLIM_INFINITY32        0xffffffff
+#define RESOURCE32(x) ((x > RLIM_INFINITY32) ? RLIM_INFINITY32 : x)
+
+struct rlimit32 {
+       int     rlim_cur;
+       int     rlim_max;
+};
+
+extern asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim);
+
+asmlinkage long
+sys32_getrlimit(unsigned int resource, struct rlimit32 *rlim)
+{
+       struct rlimit r;
+       int ret;
+       mm_segment_t old_fs;
+
+       old_fs = get_fs();
+       set_fs(KERNEL_DS);
+       ret = sys_getrlimit(resource, &r);
+       set_fs(old_fs);
+       if (!ret) {
+               if (verify_area(VERIFY_WRITE, rlim, sizeof(struct rlimit32)) ||
+                   __put_user(RESOURCE32(r.rlim_cur), &rlim->rlim_cur) ||
+                   __put_user(RESOURCE32(r.rlim_max), &rlim->rlim_max))
+                       ret = -EFAULT;
+       }
+       return ret;
+}
+
+extern asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit *rlim);
+
+asmlinkage long
+sys32_old_getrlimit(unsigned int resource, struct rlimit32 *rlim)
+{
+       struct rlimit r;
+       int ret;
+       mm_segment_t old_fs;
+       
+       old_fs = get_fs();
+       set_fs(KERNEL_DS);
+       ret = sys_old_getrlimit(resource, &r);
+       set_fs(old_fs);
+       if (!ret) {
+               if (verify_area(VERIFY_WRITE, rlim, sizeof(struct rlimit32)) ||
+                   __put_user(r.rlim_cur, &rlim->rlim_cur) ||
+                   __put_user(r.rlim_max, &rlim->rlim_max))
+                       ret = -EFAULT;
+       }
+       return ret;
+}
+
+extern asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim);
+
+asmlinkage long
+sys32_setrlimit(unsigned int resource, struct rlimit32 *rlim)
+{
+       struct rlimit r;
+       int ret;
+       mm_segment_t old_fs = get_fs ();
+
+       if (resource >= RLIM_NLIMITS) return -EINVAL;   
+       if (verify_area(VERIFY_READ, rlim, sizeof(struct rlimit32)) ||
+           __get_user (r.rlim_cur, &rlim->rlim_cur) ||
+           __get_user (r.rlim_max, &rlim->rlim_max))
+               return -EFAULT;
+       if (r.rlim_cur == RLIM_INFINITY32)
+               r.rlim_cur = RLIM_INFINITY;
+       if (r.rlim_max == RLIM_INFINITY32)
+               r.rlim_max = RLIM_INFINITY;
+       set_fs (KERNEL_DS);
+       ret = sys_setrlimit(resource, &r);
+       set_fs (old_fs);
+       return ret;
+}
+
+/*
+ * sys32_ipc() is the de-multiplexer for the SysV IPC calls in 32bit emulation..
+ *
+ * This is really horribly ugly.
+ */
+
+struct msgbuf32 { s32 mtype; char mtext[1]; };
+
+struct ipc_perm32
+{
+       key_t             key;
+        __kernel_uid_t32  uid;
+        __kernel_gid_t32  gid;
+        __kernel_uid_t32  cuid;
+        __kernel_gid_t32  cgid;
+        __kernel_mode_t32 mode;
+        unsigned short  seq;
+};
+
+struct semid_ds32 {
+        struct ipc_perm32 sem_perm;               /* permissions .. see ipc.h */
+        __kernel_time_t32 sem_otime;              /* last semop time */
+        __kernel_time_t32 sem_ctime;              /* last change time */
+        u32 sem_base;              /* ptr to first semaphore in array */
+        u32 sem_pending;          /* pending operations to be processed */
+        u32 sem_pending_last;    /* last pending operation */
+        u32 undo;                  /* undo requests on this array */
+        unsigned short  sem_nsems;              /* no. of semaphores in array */
+};
+
+struct msqid_ds32
+{
+        struct ipc_perm32 msg_perm;
+        u32 msg_first;
+        u32 msg_last;
+        __kernel_time_t32 msg_stime;
+        __kernel_time_t32 msg_rtime;
+        __kernel_time_t32 msg_ctime;
+        u32 wwait;
+        u32 rwait;
+        unsigned short msg_cbytes;
+        unsigned short msg_qnum;  
+        unsigned short msg_qbytes;
+        __kernel_ipc_pid_t32 msg_lspid;
+        __kernel_ipc_pid_t32 msg_lrpid;
+};
+
+struct shmid_ds32 {
+        struct ipc_perm32       shm_perm;
+        int                     shm_segsz;
+        __kernel_time_t32       shm_atime;
+        __kernel_time_t32       shm_dtime;
+        __kernel_time_t32       shm_ctime;
+        __kernel_ipc_pid_t32    shm_cpid; 
+        __kernel_ipc_pid_t32    shm_lpid; 
+        unsigned short          shm_nattch;
+};
+
+#define IPCOP_MASK(__x)        (1UL << (__x))
+
+static int
+do_sys32_semctl(int first, int second, int third, void *uptr)
+{
+       union semun fourth;
+       u32 pad;
+       int err;
+       struct semid64_ds s;
+       struct semid_ds32 *usp;
+       mm_segment_t old_fs;
+
+       if (!uptr)
+               return -EINVAL;
+       err = -EFAULT;
+       if (get_user (pad, (u32 *)uptr))
+               return err;
+       if(third == SETVAL)
+               fourth.val = (int)pad;
+       else
+               fourth.__pad = (void *)A(pad);
+       switch (third) {
+
+       case IPC_INFO:
+       case IPC_RMID:
+       case IPC_SET:
+       case SEM_INFO:
+       case GETVAL:
+       case GETPID:
+       case GETNCNT:
+       case GETZCNT:
+       case GETALL:
+       case SETVAL:
+       case SETALL:
+               err = sys_semctl (first, second, third, fourth);
+               break;
+
+       case IPC_STAT:
+       case SEM_STAT:
+               usp = (struct semid_ds32 *)A(pad);
+               fourth.__pad = &s;
+               old_fs = get_fs ();
+               set_fs (KERNEL_DS);
+               err = sys_semctl (first, second, third, fourth);
+               set_fs (old_fs);
+               if (verify_area(VERIFY_WRITE, usp, sizeof(struct semid_ds32)) ||
+                   __put_user(s.sem_perm.key, &usp->sem_perm.key) ||
+                   __put_user(s.sem_perm.uid, &usp->sem_perm.uid) ||
+                   __put_user(s.sem_perm.gid, &usp->sem_perm.gid) ||
+                   __put_user(s.sem_perm.cuid, &usp->sem_perm.cuid) ||
+                   __put_user (s.sem_perm.cgid, &usp->sem_perm.cgid) ||
+                   __put_user (s.sem_perm.mode, &usp->sem_perm.mode) ||
+                   __put_user (s.sem_perm.seq, &usp->sem_perm.seq) ||
+                   __put_user (s.sem_otime, &usp->sem_otime) ||
+                   __put_user (s.sem_ctime, &usp->sem_ctime) ||
+                   __put_user (s.sem_nsems, &usp->sem_nsems))
+                       return -EFAULT;
+               break;
+
+       }
+
+       return err;
+}
+
+static int
+do_sys32_msgsnd (int first, int second, int third, void *uptr)
+{
+       struct msgbuf *p = kmalloc (second + sizeof (struct msgbuf)
+                                   + 4, GFP_USER);
+       struct msgbuf32 *up = (struct msgbuf32 *)uptr;
+       mm_segment_t old_fs;
+       int err;
+
+       if (!p)
+               return -ENOMEM;
+       err = verify_area(VERIFY_READ, up, sizeof(struct msgbuf32));
+       if (err)
+               goto out;
+       err = __get_user (p->mtype, &up->mtype);
+       err |= __copy_from_user (p->mtext, &up->mtext, second);
+       if (err)
+               goto out;
+       old_fs = get_fs ();
+       set_fs (KERNEL_DS);
+       err = sys_msgsnd (first, p, second, third);
+       set_fs (old_fs);
+out:
+       kfree (p);
+       return err;
+}
+
+static int
+do_sys32_msgrcv (int first, int second, int msgtyp, int third,
+                int version, void *uptr)
+{
+       struct msgbuf32 *up;
+       struct msgbuf *p;
+       mm_segment_t old_fs;
+       int err;
+
+       if (!version) {
+               struct ipc_kludge *uipck = (struct ipc_kludge *)uptr;
+               struct ipc_kludge ipck;
+
+               err = -EINVAL;
+               if (!uptr)
+                       goto out;
+               err = -EFAULT;
+               if (copy_from_user (&ipck, uipck, sizeof (struct ipc_kludge)))
+                       goto out;
+               uptr = (void *)A(ipck.msgp);
+               msgtyp = ipck.msgtyp;
+       }
+       err = -ENOMEM;
+       p = kmalloc (second + sizeof (struct msgbuf) + 4, GFP_USER);
+       if (!p)
+               goto out;
+       old_fs = get_fs ();
+       set_fs (KERNEL_DS);
+       err = sys_msgrcv (first, p, second + 4, msgtyp, third);
+       set_fs (old_fs);
+       if (err < 0)
+               goto free_then_out;
+       up = (struct msgbuf32 *)uptr;
+       if (verify_area(VERIFY_WRITE, up, sizeof(struct msgbuf32)) ||
+           __put_user (p->mtype, &up->mtype) ||
+           __copy_to_user (&up->mtext, p->mtext, err))
+               err = -EFAULT;
+free_then_out:
+       kfree (p);
+out:
+       return err;
+}
+
+static int
+do_sys32_msgctl (int first, int second, void *uptr)
+{
+       int err = -EINVAL;
+       struct msqid_ds m;
+       struct msqid64_ds m64;
+       struct msqid_ds32 *up = (struct msqid_ds32 *)uptr;
+       mm_segment_t old_fs;
+
+       switch (second) {
+
+       case IPC_INFO:
+       case IPC_RMID:
+       case MSG_INFO:
+               err = sys_msgctl (first, second, (struct msqid_ds *)uptr);
+               break;
+
+       case IPC_SET:
+               err = verify_area(VERIFY_READ, up, sizeof(struct msqid_ds32));
+               if (err)
+                       break;
+               err = __get_user (m.msg_perm.uid, &up->msg_perm.uid);
+               err |= __get_user (m.msg_perm.gid, &up->msg_perm.gid);
+               err |= __get_user (m.msg_perm.mode, &up->msg_perm.mode);
+               err |= __get_user (m.msg_qbytes, &up->msg_qbytes);
+               if (err)
+                       break;
+               old_fs = get_fs ();
+               set_fs (KERNEL_DS);
+               err = sys_msgctl (first, second, &m);
+               set_fs (old_fs);
+               break;
+
+       case IPC_STAT:
+       case MSG_STAT:
+               old_fs = get_fs ();
+               set_fs (KERNEL_DS);
+               err = sys_msgctl (first, second, (void *) &m64);
+               set_fs (old_fs);
+               if (verify_area(VERIFY_WRITE, up, sizeof(struct msqid_ds32)) ||
+                   __put_user (m64.msg_perm.key, &up->msg_perm.key) ||
+                   __put_user(m64.msg_perm.uid, &up->msg_perm.uid) ||
+                   __put_user(m64.msg_perm.gid, &up->msg_perm.gid) ||
+                   __put_user(m64.msg_perm.cuid, &up->msg_perm.cuid) ||
+                   __put_user(m64.msg_perm.cgid, &up->msg_perm.cgid) ||
+                   __put_user(m64.msg_perm.mode, &up->msg_perm.mode) ||
+                   __put_user(m64.msg_perm.seq, &up->msg_perm.seq) ||
+                   __put_user(m64.msg_stime, &up->msg_stime) ||
+                   __put_user(m64.msg_rtime, &up->msg_rtime) ||
+                   __put_user(m64.msg_ctime, &up->msg_ctime) ||
+                   __put_user(m64.msg_cbytes, &up->msg_cbytes) ||
+                   __put_user(m64.msg_qnum, &up->msg_qnum) ||
+                   __put_user(m64.msg_qbytes, &up->msg_qbytes) ||
+                   __put_user(m64.msg_lspid, &up->msg_lspid) ||
+                   __put_user(m64.msg_lrpid, &up->msg_lrpid))
+                       return -EFAULT;
+               break;
+
+       }
+
+       return err;
+}
+
+static int
+do_sys32_shmat (int first, int second, int third, int version, void *uptr)
+{
+       unsigned long raddr;
+       u32 *uaddr = (u32 *)A((u32)third);
+       int err = -EINVAL;
+
+       if (version == 1)
+               return err;
+       err = sys_shmat (first, uptr, second, &raddr);
+       if (err)
+               return err;
+       err = put_user (raddr, uaddr);
+       return err;
+}
+
+static int
+do_sys32_shmctl (int first, int second, void *uptr)
+{
+       int err = -EFAULT;
+       struct shmid_ds s;
+       struct shmid64_ds s64;
+       struct shmid_ds32 *up = (struct shmid_ds32 *)uptr;
+       mm_segment_t old_fs;
+       struct shm_info32 {
+               int used_ids;
+               u32 shm_tot, shm_rss, shm_swp;
+               u32 swap_attempts, swap_successes;
+       } *uip = (struct shm_info32 *)uptr;
+       struct shm_info si;
+
+       switch (second) {
+
+       case IPC_INFO:
+       case IPC_RMID:
+       case SHM_LOCK:
+       case SHM_UNLOCK:
+               err = sys_shmctl (first, second, (struct shmid_ds *)uptr);
+               break;
+       case IPC_SET:
+               err = verify_area(VERIFY_READ, up, sizeof(struct shmid_ds32));
+               if (err)
+                       break;
+               err = __get_user (s.shm_perm.uid, &up->shm_perm.uid);
+               err |= __get_user (s.shm_perm.gid, &up->shm_perm.gid);
+               err |= __get_user (s.shm_perm.mode, &up->shm_perm.mode);
+               if (err)
+                       break;
+               old_fs = get_fs ();
+               set_fs (KERNEL_DS);
+               err = sys_shmctl (first, second, &s);
+               set_fs (old_fs);
+               break;
+
+       case IPC_STAT:
+       case SHM_STAT:
+               old_fs = get_fs ();
+               set_fs (KERNEL_DS);
+               err = sys_shmctl (first, second, (void *) &s64);
+               set_fs (old_fs);
+               if (err < 0)
+                       break;
+               if (verify_area(VERIFY_WRITE, up, sizeof(struct shmid_ds32)) ||
+                   __put_user (s64.shm_perm.key, &up->shm_perm.key) ||
+                   __put_user (s64.shm_perm.uid, &up->shm_perm.uid) ||
+                   __put_user (s64.shm_perm.gid, &up->shm_perm.gid) ||
+                   __put_user (s64.shm_perm.cuid, &up->shm_perm.cuid) ||
+                   __put_user (s64.shm_perm.cgid, &up->shm_perm.cgid) ||
+                   __put_user (s64.shm_perm.mode, &up->shm_perm.mode) ||
+                   __put_user (s64.shm_perm.seq, &up->shm_perm.seq) ||
+                   __put_user (s64.shm_atime, &up->shm_atime) ||
+                   __put_user (s64.shm_dtime, &up->shm_dtime) ||
+                   __put_user (s64.shm_ctime, &up->shm_ctime) ||
+                   __put_user (s64.shm_segsz, &up->shm_segsz) ||
+                   __put_user (s64.shm_nattch, &up->shm_nattch) ||
+                   __put_user (s64.shm_cpid, &up->shm_cpid) ||
+                   __put_user (s64.shm_lpid, &up->shm_lpid))
+                       return -EFAULT;
+               break;
+
+       case SHM_INFO:
+               old_fs = get_fs ();
+               set_fs (KERNEL_DS);
+               err = sys_shmctl (first, second, (void *)&si);
+               set_fs (old_fs);
+               if (err < 0)
+                       break;
+               if (verify_area(VERIFY_WRITE, uip, sizeof(struct shm_info32)) ||
+                   __put_user (si.used_ids, &uip->used_ids) ||
+                   __put_user (si.shm_tot, &uip->shm_tot) ||
+                   __put_user (si.shm_rss, &uip->shm_rss) ||
+                   __put_user (si.shm_swp, &uip->shm_swp) ||
+                   __put_user (si.swap_attempts, &uip->swap_attempts) ||
+                   __put_user (si.swap_successes, &uip->swap_successes))
+                       return -EFAULT;
+               break;
+
+       }
+       return err;
+}
+
+asmlinkage long
+sys32_ipc (u32 call, int first, int second, int third, u32 ptr, u32 fifth)
+{
+       int version, err;
+
+       version = call >> 16; /* hack for backward compatibility */
+       call &= 0xffff;
+
+       switch (call) {
+
+       case SEMOP:
+               /* struct sembuf is the same on 32 and 64bit :)) */
+               err = sys_semop (first, (struct sembuf *)AA(ptr),
+                                second);
+               break;
+       case SEMGET:
+               err = sys_semget (first, second, third);
+               break;
+       case SEMCTL:
+               err = do_sys32_semctl (first, second, third,
+                                      (void *)AA(ptr));
+               break;
+
+       case MSGSND:
+               err = do_sys32_msgsnd (first, second, third,
+                                      (void *)AA(ptr));
+               break;
+       case MSGRCV:
+               err = do_sys32_msgrcv (first, second, fifth, third,
+                                      version, (void *)AA(ptr));
+               break;
+       case MSGGET:
+               err = sys_msgget ((key_t) first, second);
+               break;
+       case MSGCTL:
+               err = do_sys32_msgctl (first, second, (void *)AA(ptr));
+               break;
+
+       case SHMAT:
+               err = do_sys32_shmat (first, second, third,
+                                     version, (void *)AA(ptr));
+               break;
+       case SHMDT: 
+               err = sys_shmdt ((char *)AA(ptr));
+               break;
+       case SHMGET:
+               err = sys_shmget (first, second, third);
+               break;
+       case SHMCTL:
+               err = do_sys32_shmctl (first, second, (void *)AA(ptr));
+               break;
+       default:
+               err = -EINVAL;
+               break;
+       }
+
+       return err;
+}
+
+/*
+ * sys_time() can be implemented in user-level using
+ * sys_gettimeofday().  IA64 did this but i386 Linux did not
+ * so we have to implement this system call here.
+ */
+asmlinkage long sys32_time(int * tloc)
+{
+       int i;
+
+       /* SMP: This is fairly trivial. We grab CURRENT_TIME and 
+          stuff it to user space. No side effects */
+       i = CURRENT_TIME;
+       if (tloc) {
+               if (put_user(i,tloc))
+                       i = -EFAULT;
+       }
+       return i;
+}
+
+struct rusage32 {
+        struct timeval32 ru_utime;
+        struct timeval32 ru_stime;
+        int    ru_maxrss;
+        int    ru_ixrss;
+        int    ru_idrss;
+        int    ru_isrss;
+        int    ru_minflt;
+        int    ru_majflt;
+        int    ru_nswap;
+        int    ru_inblock;
+        int    ru_oublock;
+        int    ru_msgsnd; 
+        int    ru_msgrcv; 
+        int    ru_nsignals;
+        int    ru_nvcsw;
+        int    ru_nivcsw;
+};
+
+static int
+put_rusage (struct rusage32 *ru, struct rusage *r)
+{
+       if (verify_area(VERIFY_WRITE, ru, sizeof(struct rusage32)) ||
+           __put_user (r->ru_utime.tv_sec, &ru->ru_utime.tv_sec) ||
+           __put_user (r->ru_utime.tv_usec, &ru->ru_utime.tv_usec) ||
+           __put_user (r->ru_stime.tv_sec, &ru->ru_stime.tv_sec) ||
+           __put_user (r->ru_stime.tv_usec, &ru->ru_stime.tv_usec) ||
+           __put_user (r->ru_maxrss, &ru->ru_maxrss) ||
+           __put_user (r->ru_ixrss, &ru->ru_ixrss) ||
+           __put_user (r->ru_idrss, &ru->ru_idrss) ||
+           __put_user (r->ru_isrss, &ru->ru_isrss) ||
+           __put_user (r->ru_minflt, &ru->ru_minflt) ||
+           __put_user (r->ru_majflt, &ru->ru_majflt) ||
+           __put_user (r->ru_nswap, &ru->ru_nswap) ||
+           __put_user (r->ru_inblock, &ru->ru_inblock) ||
+           __put_user (r->ru_oublock, &ru->ru_oublock) ||
+           __put_user (r->ru_msgsnd, &ru->ru_msgsnd) ||
+           __put_user (r->ru_msgrcv, &ru->ru_msgrcv) ||
+           __put_user (r->ru_nsignals, &ru->ru_nsignals) ||
+           __put_user (r->ru_nvcsw, &ru->ru_nvcsw) ||
+           __put_user (r->ru_nivcsw, &ru->ru_nivcsw))
+               return -EFAULT;
+       return 0;
+}
+
+extern asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr,
+                               int options, struct rusage * ru);
+
+asmlinkage long
+sys32_wait4(__kernel_pid_t32 pid, unsigned int *stat_addr, int options,
+           struct rusage32 *ru)
+{
+       if (!ru)
+               return sys_wait4(pid, stat_addr, options, NULL);
+       else {
+               struct rusage r;
+               int ret;
+               unsigned int status;
+               mm_segment_t old_fs = get_fs();
+               
+               set_fs (KERNEL_DS);
+               ret = sys_wait4(pid, stat_addr ? &status : NULL, options, &r);
+               set_fs (old_fs);
+               if (put_rusage (ru, &r)) return -EFAULT;
+               if (stat_addr && put_user (status, stat_addr))
+                       return -EFAULT;
+               return ret;
+       }
+}
+
+asmlinkage long
+sys32_waitpid(__kernel_pid_t32 pid, unsigned int *stat_addr, int options)
+{
+       return sys32_wait4(pid, stat_addr, options, NULL);
+}
+
+
+extern asmlinkage long
+sys_getrusage(int who, struct rusage *ru);
+
+asmlinkage long
+sys32_getrusage(int who, struct rusage32 *ru)
+{
+       struct rusage r;
+       int ret;
+       mm_segment_t old_fs = get_fs();
+               
+       set_fs (KERNEL_DS);
+       ret = sys_getrusage(who, &r);
+       set_fs (old_fs);
+       if (put_rusage (ru, &r)) return -EFAULT;
+       return ret;
+}
+
+struct tms32 {
+       __kernel_clock_t32 tms_utime;
+       __kernel_clock_t32 tms_stime;
+       __kernel_clock_t32 tms_cutime;
+       __kernel_clock_t32 tms_cstime;
+};
+                                
+extern asmlinkage long sys_times(struct tms * tbuf);
+
+asmlinkage long
+sys32_times(struct tms32 *tbuf)
+{
+       struct tms t;
+       long ret;
+       mm_segment_t old_fs = get_fs ();
+       
+       set_fs (KERNEL_DS);
+       ret = sys_times(tbuf ? &t : NULL);
+       set_fs (old_fs);
+       if (tbuf) {
+               if (verify_area(VERIFY_WRITE, tbuf, sizeof(struct tms32)) ||
+                   __put_user (t.tms_utime, &tbuf->tms_utime) ||
+                   __put_user (t.tms_stime, &tbuf->tms_stime) ||
+                   __put_user (t.tms_cutime, &tbuf->tms_cutime) ||
+                   __put_user (t.tms_cstime, &tbuf->tms_cstime))
+                       return -EFAULT;
+       }
+       return ret;
+}
+
+static inline int
+get_flock32(struct flock *kfl, struct flock32 *ufl)
+{
+       if (verify_area(VERIFY_READ, ufl, sizeof(struct flock32)) ||
+           __get_user(kfl->l_type, &ufl->l_type) ||
+           __get_user(kfl->l_whence, &ufl->l_whence) ||
+           __get_user(kfl->l_start, &ufl->l_start) ||
+           __get_user(kfl->l_len, &ufl->l_len) ||
+           __get_user(kfl->l_pid, &ufl->l_pid))
+               return -EFAULT;
+       return 0;
+}
+
+static inline int
+put_flock32(struct flock *kfl, struct flock32 *ufl)
+{
+       if (verify_area(VERIFY_WRITE, ufl, sizeof(struct flock32)) ||
+           __put_user(kfl->l_type, &ufl->l_type) ||
+           __put_user(kfl->l_whence, &ufl->l_whence) ||
+           __put_user(kfl->l_start, &ufl->l_start) ||
+           __put_user(kfl->l_len, &ufl->l_len) ||
+           __put_user(kfl->l_pid, &ufl->l_pid))
+               return -EFAULT;
+       return 0;
+}
+
+extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd,
+                                unsigned long arg);
+
+asmlinkage long
+sys32_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       struct flock f;
+       mm_segment_t old_fs;
+       long ret;
+
+       switch (cmd) {
+       case F_GETLK:
+       case F_SETLK:
+       case F_SETLKW:
+               if(cmd != F_GETLK && get_flock32(&f, (struct flock32 *)((long)arg)))
+                       return -EFAULT;
+               old_fs = get_fs();
+               set_fs(KERNEL_DS);
+               ret = sys_fcntl(fd, cmd, (unsigned long)&f);
+               set_fs(old_fs);
+               if(cmd == F_GETLK && put_flock32(&f, (struct flock32 *)((long)arg)))
+                       return -EFAULT;
+               return ret;
+       default:
+               /*
+                *  `sys_fcntl' lies about arg, for the F_SETOWN
+                *  sub-function arg can have a negative value.
+                */
+               return sys_fcntl(fd, cmd, (unsigned long)((long)arg));
+       }
+}
+
+static inline int
+get_flock64(struct flock *kfl, struct ia32_flock64 *ufl)
+{
+       if (verify_area(VERIFY_READ, ufl, sizeof(struct ia32_flock64)) ||
+           __get_user(kfl->l_type, &ufl->l_type) ||
+           __get_user(kfl->l_whence, &ufl->l_whence) ||
+           __copy_from_user(&kfl->l_start, &ufl->l_start, 8) ||
+           __copy_from_user(&kfl->l_len, &ufl->l_len, 8) ||
+           __get_user(kfl->l_pid, &ufl->l_pid))
+               return -EFAULT;
+       return 0;
+}
+
+static inline int
+put_flock64(struct flock *kfl, struct ia32_flock64 *ufl)
+{
+       if (verify_area(VERIFY_WRITE, ufl, sizeof(struct ia32_flock64)) ||
+           __put_user(kfl->l_type, &ufl->l_type) ||
+           __put_user(kfl->l_whence, &ufl->l_whence) ||
+           __copy_to_user(&ufl->l_start,&kfl->l_start, 8) ||
+           __copy_to_user(&ufl->l_len,&kfl->l_len, 8) ||
+           __put_user(kfl->l_pid, &ufl->l_pid))
+               return -EFAULT;
+       return 0;
+}
+
+asmlinkage long
+sys32_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg)
+{
+       struct flock f;
+       mm_segment_t old_fs;
+       long ret;
+
+       /* sys_fcntl() is by default 64 bit and so don't know anything
+        * about F_xxxx64 commands
+        */ 
+       switch (cmd) {
+       case F_GETLK64:
+               cmd = F_GETLK;
+               break;
+       case F_SETLK64:
+               cmd = F_SETLK;
+               break;
+       case F_SETLKW64:
+               cmd = F_SETLKW;
+               break;
+       }
+       
+       switch (cmd) {
+       case F_SETLKW:
+       case F_SETLK:
+               if(get_flock64(&f, (struct ia32_flock64 *)arg))
+                       return -EFAULT;
+       case F_GETLK:
+               old_fs = get_fs();
+               set_fs(KERNEL_DS);
+               ret = sys_fcntl(fd, cmd, (unsigned long)&f);
+               set_fs(old_fs);
+               if(cmd == F_GETLK64 && put_flock64(&f, (struct ia32_flock64 *)((long)arg)))
+                       return -EFAULT;
+               return ret;
+       default:
+               /*
+                *  `sys_fcntl' lies about arg, for the F_SETOWN
+                *  sub-function arg can have a negative value.
+                */
+               return sys_fcntl(fd, cmd, (unsigned long)((long)arg));
+       }
+}
+
+int sys32_ni_syscall(int call)
+{ 
+       printk(KERN_INFO "IA32 syscall %d from %s not implemented\n", call,
+              current->comm);
+       return -ENOSYS;        
+} 
+
+/* In order to reduce some races, while at the same time doing additional
+ * checking and hopefully speeding things up, we copy filenames to the
+ * kernel data space before using them..
+ *
+ * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
+ */
+static inline int
+do_getname32(const char *filename, char *page)
+{
+       int retval;
+
+       /* 32bit pointer will be always far below TASK_SIZE :)) */
+       retval = strncpy_from_user((char *)page, (char *)filename, PAGE_SIZE);
+       if (retval > 0) {
+               if (retval < PAGE_SIZE)
+                       return 0;
+               return -ENAMETOOLONG;
+       } else if (!retval)
+               retval = -ENOENT;
+       return retval;
+}
+
+char *
+getname32(const char *filename)
+{
+       char *tmp, *result;
+
+       result = ERR_PTR(-ENOMEM);
+       tmp = (char *)__get_free_page(GFP_KERNEL);
+       if (tmp)  {
+               int retval = do_getname32(filename, tmp);
+
+               result = tmp;
+               if (retval < 0) {
+                       putname(tmp);
+                       result = ERR_PTR(retval);
+               }
+       }
+       return result;
+}
+
+/* 32-bit timeval and related flotsam.  */
+
+extern asmlinkage long sys_utime(char * filename, struct utimbuf * times);
+
+struct utimbuf32 {
+       __kernel_time_t32 actime, modtime;
+};
+
+asmlinkage long
+sys32_utime(char * filename, struct utimbuf32 *times)
+{
+       struct utimbuf t;
+       mm_segment_t old_fs;
+       int ret;
+       char *filenam;
+       
+       if (!times)
+               return sys_utime(filename, NULL);
+       if (verify_area(VERIFY_READ, times, sizeof(struct utimbuf32)) ||
+           __get_user (t.actime, &times->actime) ||
+           __get_user (t.modtime, &times->modtime))
+               return -EFAULT;
+       filenam = getname32 (filename);
+       ret = PTR_ERR(filenam);
+       if (!IS_ERR(filenam)) {
+               old_fs = get_fs();
+               set_fs (KERNEL_DS); 
+               ret = sys_utime(filenam, &t);
+               set_fs (old_fs);
+               putname (filenam);
+       }
+       return ret;
+}
+
+/*
+ * Ooo, nasty.  We need here to frob 32-bit unsigned longs to
+ * 64-bit unsigned longs.
+ */
+
+static inline int
+get_fd_set32(unsigned long n, unsigned long *fdset, u32 *ufdset)
+{
+       if (ufdset) {
+               unsigned long odd;
+
+               if (verify_area(VERIFY_READ, ufdset, n*sizeof(u32)))
+                       return -EFAULT;
+
+               odd = n & 1UL;
+               n &= ~1UL;
+               while (n) {
+                       unsigned long h, l;
+                       __get_user(l, ufdset);
+                       __get_user(h, ufdset+1);
+                       ufdset += 2;
+                       *fdset++ = h << 32 | l;
+                       n -= 2;
+               }
+               if (odd)
+                       __get_user(*fdset, ufdset);
+       } else {
+               /* Tricky, must clear full unsigned long in the
+                * kernel fdset at the end, this makes sure that
+                * actually happens.
+                */
+               memset(fdset, 0, ((n + 1) & ~1)*sizeof(u32));
+       }
+       return 0;
+}
+
+extern asmlinkage long sys_sysfs(int option, unsigned long arg1,
+                               unsigned long arg2);
+
+asmlinkage long
+sys32_sysfs(int option, u32 arg1, u32 arg2)
+{
+       return sys_sysfs(option, arg1, arg2);
+}
+
+extern asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type,
+                               unsigned long new_flags, void *data);
+
+static char *badfs[] = {
+       "smbfs", "ncpfs", NULL
+};     
+
+static int checktype(char *user_type) 
+{ 
+       int err = 0; 
+       char **s,*kernel_type = getname32(user_type); 
+       if (!kernel_type) 
+               return -EFAULT; 
+       for (s = badfs; *s; ++s) 
+               if (!strcmp(kernel_type, *s)) { 
+                       printk(KERN_ERR "mount32: unsupported fs `%s' -- use 64bit mount\n", *s); 
+                       err = -EINVAL; 
+                       break;
+               }       
+       putname(user_type); 
+       return err;
+} 
+
+asmlinkage long
+sys32_mount(char *dev_name, char *dir_name, char *type,
+           unsigned long new_flags, u32 data)
+{
+       int err;
+       if(!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       err = checktype(type);
+       if (err)
+               return err;
+       return sys_mount(dev_name, dir_name, type, new_flags, (void *)AA(data));
+}
+
+struct sysinfo32 {
+        s32 uptime;
+        u32 loads[3];
+        u32 totalram;
+        u32 freeram;
+        u32 sharedram;
+        u32 bufferram;
+        u32 totalswap;
+        u32 freeswap;
+        unsigned short procs;
+        char _f[22];
+};
+
+extern asmlinkage long sys_sysinfo(struct sysinfo *info);
+
+asmlinkage long
+sys32_sysinfo(struct sysinfo32 *info)
+{
+       struct sysinfo s;
+       int ret;
+       mm_segment_t old_fs = get_fs ();
+       
+       set_fs (KERNEL_DS);
+       ret = sys_sysinfo(&s);
+       set_fs (old_fs);
+       if (verify_area(VERIFY_WRITE, info, sizeof(struct sysinfo32)) ||
+           __put_user (s.uptime, &info->uptime) ||
+           __put_user (s.loads[0], &info->loads[0]) ||
+           __put_user (s.loads[1], &info->loads[1]) ||
+           __put_user (s.loads[2], &info->loads[2]) ||
+           __put_user (s.totalram, &info->totalram) ||
+           __put_user (s.freeram, &info->freeram) ||
+           __put_user (s.sharedram, &info->sharedram) ||
+           __put_user (s.bufferram, &info->bufferram) ||
+           __put_user (s.totalswap, &info->totalswap) ||
+           __put_user (s.freeswap, &info->freeswap) ||
+           __put_user (s.procs, &info->procs))
+               return -EFAULT;
+       return 0;
+}
+                
+extern asmlinkage long sys_sched_rr_get_interval(pid_t pid,
+                                               struct timespec *interval);
+
+asmlinkage long
+sys32_sched_rr_get_interval(__kernel_pid_t32 pid, struct timespec32 *interval)
+{
+       struct timespec t;
+       int ret;
+       mm_segment_t old_fs = get_fs ();
+       
+       set_fs (KERNEL_DS);
+       ret = sys_sched_rr_get_interval(pid, &t);
+       set_fs (old_fs);
+       if (verify_area(VERIFY_WRITE, interval, sizeof(struct timespec32)) ||
+           __put_user (t.tv_sec, &interval->tv_sec) ||
+           __put_user (t.tv_nsec, &interval->tv_nsec))
+               return -EFAULT;
+       return ret;
+}
+
+extern asmlinkage long sys_sigprocmask(int how, old_sigset_t *set,
+                                     old_sigset_t *oset);
+
+asmlinkage long
+sys32_sigprocmask(int how, old_sigset32_t *set, old_sigset32_t *oset)
+{
+       old_sigset_t s;
+       int ret;
+       mm_segment_t old_fs = get_fs();
+       
+       if (set && get_user (s, set)) return -EFAULT;
+       set_fs (KERNEL_DS);
+       ret = sys_sigprocmask(how, set ? &s : NULL, oset ? &s : NULL);
+       set_fs (old_fs);
+       if (ret) return ret;
+       if (oset && put_user (s, oset)) return -EFAULT;
+       return 0;
+}
+
+extern asmlinkage long sys_sigpending(old_sigset_t *set);
+
+asmlinkage long
+sys32_sigpending(old_sigset32_t *set)
+{
+       old_sigset_t s;
+       int ret;
+       mm_segment_t old_fs = get_fs();
+               
+       set_fs (KERNEL_DS);
+       ret = sys_sigpending(&s);
+       set_fs (old_fs);
+       if (put_user (s, set)) return -EFAULT;
+       return ret;
+}
+
+extern asmlinkage long sys_rt_sigpending(sigset_t *set, size_t sigsetsize);
+
+asmlinkage long
+sys32_rt_sigpending(sigset32_t *set, __kernel_size_t32 sigsetsize)
+{
+       sigset_t s;
+       sigset32_t s32;
+       int ret;
+       mm_segment_t old_fs = get_fs();
+               
+       set_fs (KERNEL_DS);
+       ret = sys_rt_sigpending(&s, sigsetsize);
+       set_fs (old_fs);
+       if (!ret) {
+               switch (_NSIG_WORDS) {
+               case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
+               case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
+               case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
+               case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
+               }
+               if (copy_to_user (set, &s32, sizeof(sigset32_t)))
+                       return -EFAULT;
+       }
+       return ret;
+}
+
+siginfo_t32 *
+siginfo64to32(siginfo_t32 *d, siginfo_t *s)
+{
+       memset (d, 0, sizeof(siginfo_t32));
+       d->si_signo = s->si_signo;
+       d->si_errno = s->si_errno;
+       d->si_code = s->si_code;
+       if (s->si_signo >= SIGRTMIN) {
+               d->si_pid = s->si_pid;
+               d->si_uid = s->si_uid;
+               /* XXX: Ouch, how to find this out??? */
+               d->si_int = s->si_int;
+       } else switch (s->si_signo) {
+       /* XXX: What about POSIX1.b timers */
+       case SIGCHLD:
+               d->si_pid = s->si_pid;
+               d->si_status = s->si_status;
+               d->si_utime = s->si_utime;
+               d->si_stime = s->si_stime;
+               break;
+       case SIGSEGV:
+       case SIGBUS:
+       case SIGFPE:
+       case SIGILL:
+               d->si_addr = (long)(s->si_addr);
+//             d->si_trapno = s->si_trapno;
+               break;
+       case SIGPOLL:
+               d->si_band = s->si_band;
+               d->si_fd = s->si_fd;
+               break;
+       default:
+               d->si_pid = s->si_pid;
+               d->si_uid = s->si_uid;
+               break;
+       }
+       return d;
+}
+
+siginfo_t *
+siginfo32to64(siginfo_t *d, siginfo_t32 *s)
+{
+       d->si_signo = s->si_signo;
+       d->si_errno = s->si_errno;
+       d->si_code = s->si_code;
+       if (s->si_signo >= SIGRTMIN) {
+               d->si_pid = s->si_pid;
+               d->si_uid = s->si_uid;
+               /* XXX: Ouch, how to find this out??? */
+               d->si_int = s->si_int;
+       } else switch (s->si_signo) {
+       /* XXX: What about POSIX1.b timers */
+       case SIGCHLD:
+               d->si_pid = s->si_pid;
+               d->si_status = s->si_status;
+               d->si_utime = s->si_utime;
+               d->si_stime = s->si_stime;
+               break;
+       case SIGSEGV:
+       case SIGBUS:
+       case SIGFPE:
+       case SIGILL:
+               d->si_addr = (void *)A(s->si_addr);
+//             d->si_trapno = s->si_trapno;
+               break;
+       case SIGPOLL:
+               d->si_band = s->si_band;
+               d->si_fd = s->si_fd;
+               break;
+       default:
+               d->si_pid = s->si_pid;
+               d->si_uid = s->si_uid;
+               break;
+       }
+       return d;
+}
+
+extern asmlinkage long
+sys_rt_sigtimedwait(const sigset_t *uthese, siginfo_t *uinfo,
+                   const struct timespec *uts, size_t sigsetsize);
+
+asmlinkage long
+sys32_rt_sigtimedwait(sigset32_t *uthese, siginfo_t32 *uinfo,
+                     struct timespec32 *uts, __kernel_size_t32 sigsetsize)
+{
+       sigset_t s;
+       sigset32_t s32;
+       struct timespec t;
+       int ret;
+       mm_segment_t old_fs = get_fs();
+       siginfo_t info;
+       siginfo_t32 info32;
+               
+       if (copy_from_user (&s32, uthese, sizeof(sigset32_t)))
+               return -EFAULT;
+       switch (_NSIG_WORDS) {
+       case 4: s.sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
+       case 3: s.sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
+       case 2: s.sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
+       case 1: s.sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
+       }
+       if (uts) {
+               if (verify_area(VERIFY_READ, uts, sizeof(struct timespec32)) ||
+                   __get_user (t.tv_sec, &uts->tv_sec) ||
+                   __get_user (t.tv_nsec, &uts->tv_nsec))
+                       return -EFAULT;
+       }
+       set_fs (KERNEL_DS);
+       ret = sys_rt_sigtimedwait(&s, &info, &t, sigsetsize);
+       set_fs (old_fs);
+       if (ret >= 0 && uinfo) {
+               if (copy_to_user (uinfo, siginfo64to32(&info32, &info),
+                                 sizeof(siginfo_t32)))
+                       return -EFAULT;
+       }
+       return ret;
+}
+
+extern asmlinkage long
+sys_rt_sigqueueinfo(int pid, int sig, siginfo_t *uinfo);
+
+asmlinkage long
+sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 *uinfo)
+{
+       siginfo_t info;
+       siginfo_t32 info32;
+       int ret;
+       mm_segment_t old_fs = get_fs();
+       
+       if (copy_from_user (&info32, uinfo, sizeof(siginfo_t32)))
+               return -EFAULT;
+       /* XXX: Is this correct? */
+       siginfo32to64(&info, &info32);
+       set_fs (KERNEL_DS);
+       ret = sys_rt_sigqueueinfo(pid, sig, &info);
+       set_fs (old_fs);
+       return ret;
+}
+
+extern asmlinkage long sys_setreuid(uid_t ruid, uid_t euid);
+
+asmlinkage long sys32_setreuid(__kernel_uid_t32 ruid, __kernel_uid_t32 euid)
+{
+       uid_t sruid, seuid;
+
+       sruid = (ruid == (__kernel_uid_t32)-1) ? ((uid_t)-1) : ((uid_t)ruid);
+       seuid = (euid == (__kernel_uid_t32)-1) ? ((uid_t)-1) : ((uid_t)euid);
+       return sys_setreuid(sruid, seuid);
+}
+
+extern asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid);
+
+asmlinkage long
+sys32_setresuid(__kernel_uid_t32 ruid, __kernel_uid_t32 euid,
+               __kernel_uid_t32 suid)
+{
+       uid_t sruid, seuid, ssuid;
+
+       sruid = (ruid == (__kernel_uid_t32)-1) ? ((uid_t)-1) : ((uid_t)ruid);
+       seuid = (euid == (__kernel_uid_t32)-1) ? ((uid_t)-1) : ((uid_t)euid);
+       ssuid = (suid == (__kernel_uid_t32)-1) ? ((uid_t)-1) : ((uid_t)suid);
+       return sys_setresuid(sruid, seuid, ssuid);
+}
+
+extern asmlinkage long sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid);
+
+asmlinkage long
+sys32_getresuid(__kernel_uid_t32 *ruid, __kernel_uid_t32 *euid,
+               __kernel_uid_t32 *suid)
+{
+       uid_t a, b, c;
+       int ret;
+       mm_segment_t old_fs = get_fs();
+               
+       set_fs (KERNEL_DS);
+       ret = sys_getresuid(&a, &b, &c);
+       set_fs (old_fs);
+       if (put_user (a, ruid) || put_user (b, euid) || put_user (c, suid))
+               return -EFAULT;
+       return ret;
+}
+
+extern asmlinkage long sys_setregid(gid_t rgid, gid_t egid);
+
+asmlinkage long
+sys32_setregid(__kernel_gid_t32 rgid, __kernel_gid_t32 egid)
+{
+       gid_t srgid, segid;
+
+       srgid = (rgid == (__kernel_gid_t32)-1) ? ((gid_t)-1) : ((gid_t)rgid);
+       segid = (egid == (__kernel_gid_t32)-1) ? ((gid_t)-1) : ((gid_t)egid);
+       return sys_setregid(srgid, segid);
+}
+
+extern asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid);
+
+asmlinkage long
+sys32_setresgid(__kernel_gid_t32 rgid, __kernel_gid_t32 egid,
+               __kernel_gid_t32 sgid)
+{
+       gid_t srgid, segid, ssgid;
+
+       srgid = (rgid == (__kernel_gid_t32)-1) ? ((gid_t)-1) : ((gid_t)rgid);
+       segid = (egid == (__kernel_gid_t32)-1) ? ((gid_t)-1) : ((gid_t)egid);
+       ssgid = (sgid == (__kernel_gid_t32)-1) ? ((gid_t)-1) : ((gid_t)sgid);
+       return sys_setresgid(srgid, segid, ssgid);
+}
+
+extern asmlinkage long sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid);
+
+asmlinkage long
+sys32_getresgid(__kernel_gid_t32 *rgid, __kernel_gid_t32 *egid,
+               __kernel_gid_t32 *sgid) 
+{
+       gid_t a, b, c;
+       int ret;
+       mm_segment_t old_fs = get_fs();
+               
+       set_fs (KERNEL_DS);
+       ret = sys_getresgid(&a, &b, &c);
+       set_fs (old_fs);
+       if (!ret) {
+               ret = put_user (a, rgid);
+               ret |= put_user (b, egid);
+               ret |= put_user (c, sgid);
+       }
+       return ret;
+}
+
+extern asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist);
+
+asmlinkage long
+sys32_getgroups(int gidsetsize, __kernel_gid_t32 *grouplist)
+{
+       gid_t gl[NGROUPS];
+       int ret, i;
+       mm_segment_t old_fs = get_fs ();
+       
+       set_fs (KERNEL_DS);
+       ret = sys_getgroups(gidsetsize, gl);
+       set_fs (old_fs);
+       if (gidsetsize && ret > 0 && ret <= NGROUPS)
+               for (i = 0; i < ret; i++, grouplist++)
+                       if (put_user (gl[i], grouplist))
+                               return -EFAULT;
+       return ret;
+}
+
+extern asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist);
+
+asmlinkage long
+sys32_setgroups(int gidsetsize, __kernel_gid_t32 *grouplist)
+{
+       gid_t gl[NGROUPS];
+       int ret, i;
+       mm_segment_t old_fs = get_fs ();
+       
+       if ((unsigned) gidsetsize > NGROUPS)
+               return -EINVAL;
+       for (i = 0; i < gidsetsize; i++, grouplist++)
+               if (get_user (gl[i], grouplist))
+                       return -EFAULT;
+        set_fs (KERNEL_DS);
+       ret = sys_setgroups(gidsetsize, gl);
+       set_fs (old_fs);
+       return ret;
+}
+
+
+extern void check_pending(int signum);
+
+asmlinkage long sys_utimes(char *, struct timeval *);
+
+asmlinkage long
+sys32_utimes(char *filename, struct timeval32 *tvs)
+{
+       char *kfilename;
+       struct timeval ktvs[2];
+       mm_segment_t old_fs;
+       int ret;
+
+       kfilename = getname32(filename);
+       ret = PTR_ERR(kfilename);
+       if (!IS_ERR(kfilename)) {
+               if (tvs) {
+                       if (get_tv32(&ktvs[0], tvs) ||
+                           get_tv32(&ktvs[1], 1+tvs))
+                               return -EFAULT;
+               }
+
+               old_fs = get_fs();
+               set_fs(KERNEL_DS);
+               ret = sys_utimes(kfilename, &ktvs[0]);
+               set_fs(old_fs);
+
+               putname(kfilename);
+       }
+       return ret;
+}
+
+/* These are here just in case some old ia32 binary calls it. */
+asmlinkage long
+sys32_pause(void)
+{
+       current->state = TASK_INTERRUPTIBLE;
+       schedule();
+       return -ERESTARTNOHAND;
+}
+
+
+struct sysctl_ia32 {
+       unsigned int    name;
+       int             nlen;
+       unsigned int    oldval;
+       unsigned int    oldlenp;
+       unsigned int    newval;
+       unsigned int    newlen;
+       unsigned int    __unused[4];
+};
+
+
+asmlinkage long
+sys32_sysctl(struct sysctl_ia32 *args32)
+{
+#ifndef CONFIG_SYSCTL
+       return -ENOSYS; 
+#else
+       struct sysctl_ia32 a32;
+       mm_segment_t old_fs = get_fs ();
+       void *oldvalp, *newvalp;
+       size_t oldlen;
+       int *namep;
+       long ret;
+       extern int do_sysctl(int *name, int nlen, void *oldval, size_t *oldlenp,
+                    void *newval, size_t newlen);
+
+
+       if (copy_from_user(&a32, args32, sizeof (a32)))
+               return -EFAULT;
+
+       /*
+        * We need to pre-validate these because we have to disable address checking
+        * before calling do_sysctl() because of OLDLEN but we can't run the risk of the
+        * user specifying bad addresses here.  Well, since we're dealing with 32 bit
+        * addresses, we KNOW that access_ok() will always succeed, so this is an
+        * expensive NOP, but so what...
+        */
+       namep = (int *) A(a32.name);
+       oldvalp = (void *) A(a32.oldval);
+       newvalp = (void *) A(a32.newval);
+
+       if ((oldvalp && get_user(oldlen, (int *) A(a32.oldlenp)))
+           || !access_ok(VERIFY_WRITE, namep, 0)
+           || !access_ok(VERIFY_WRITE, oldvalp, 0)
+           || !access_ok(VERIFY_WRITE, newvalp, 0))
+               return -EFAULT;
+
+       set_fs(KERNEL_DS);
+       lock_kernel();
+       ret = do_sysctl(namep, a32.nlen, oldvalp, &oldlen, newvalp, (size_t) a32.newlen);
+       unlock_kernel();
+       set_fs(old_fs);
+
+       if (oldvalp && put_user (oldlen, (int *) A(a32.oldlenp)))
+               return -EFAULT;
+
+       return ret;
+#endif
+}
+
+extern asmlinkage long sys_newuname(struct new_utsname * name);
+
+asmlinkage long
+sys32_newuname(struct new_utsname * name)
+{
+       int ret = sys_newuname(name);
+       
+       if (current->personality == PER_LINUX32 && !ret) {
+               ret = copy_to_user(name->machine, "i386\0\0", 8);
+       }
+       return ret;
+}
+
+extern asmlinkage ssize_t sys_pread(unsigned int fd, char * buf,
+                                   size_t count, loff_t pos);
+
+extern asmlinkage ssize_t sys_pwrite(unsigned int fd, const char * buf,
+                                    size_t count, loff_t pos);
+
+typedef __kernel_ssize_t32 ssize_t32;
+
+asmlinkage ssize_t32
+sys32_pread(unsigned int fd, char *ubuf, __kernel_size_t32 count,
+           u32 poshi, u32 poslo)
+{
+       return sys_pread(fd, ubuf, count,
+                        ((loff_t)AA(poshi) << 32) | AA(poslo));
+}
+
+asmlinkage ssize_t32
+sys32_pwrite(unsigned int fd, char *ubuf, __kernel_size_t32 count,
+            u32 poshi, u32 poslo)
+{
+       return sys_pwrite(fd, ubuf, count,
+                         ((loff_t)AA(poshi) << 32) | AA(poslo));
+}
+
+
+extern asmlinkage long sys_personality(unsigned long);
+
+asmlinkage long
+sys32_personality(unsigned long personality)
+{
+       int ret;
+       if (current->personality == PER_LINUX32 && personality == PER_LINUX)
+               personality = PER_LINUX32;
+       ret = sys_personality(personality);
+       if (ret == PER_LINUX32)
+               ret = PER_LINUX;
+       return ret;
+}
+
+extern asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t *offset,
+                                      size_t count); 
+
+asmlinkage long
+sys32_sendfile(int out_fd, int in_fd, __kernel_off_t32 *offset, s32 count)
+{
+       mm_segment_t old_fs = get_fs();
+       int ret;
+       off_t of;
+       
+       if (offset && get_user(of, offset))
+               return -EFAULT;
+               
+       set_fs(KERNEL_DS);
+       ret = sys_sendfile(out_fd, in_fd, offset ? &of : NULL, count);
+       set_fs(old_fs);
+       
+       if (!ret && offset && put_user(of, offset))
+               return -EFAULT;
+               
+       return ret;
+}
+
+/* Handle adjtimex compatability. */
+
+struct timex32 {
+       u32 modes;
+       s32 offset, freq, maxerror, esterror;
+       s32 status, constant, precision, tolerance;
+       struct timeval32 time;
+       s32 tick;
+       s32 ppsfreq, jitter, shift, stabil;
+       s32 jitcnt, calcnt, errcnt, stbcnt;
+       s32  :32; s32  :32; s32  :32; s32  :32;
+       s32  :32; s32  :32; s32  :32; s32  :32;
+       s32  :32; s32  :32; s32  :32; s32  :32;
+};
+
+extern int do_adjtimex(struct timex *);
+
+asmlinkage long
+sys32_adjtimex(struct timex32 *utp)
+{
+       struct timex txc;
+       int ret;
+
+       memset(&txc, 0, sizeof(struct timex));
+
+       if(verify_area(VERIFY_READ, utp, sizeof(struct timex32)) ||
+          __get_user(txc.modes, &utp->modes) ||
+          __get_user(txc.offset, &utp->offset) ||
+          __get_user(txc.freq, &utp->freq) ||
+          __get_user(txc.maxerror, &utp->maxerror) ||
+          __get_user(txc.esterror, &utp->esterror) ||
+          __get_user(txc.status, &utp->status) ||
+          __get_user(txc.constant, &utp->constant) ||
+          __get_user(txc.precision, &utp->precision) ||
+          __get_user(txc.tolerance, &utp->tolerance) ||
+          __get_user(txc.time.tv_sec, &utp->time.tv_sec) ||
+          __get_user(txc.time.tv_usec, &utp->time.tv_usec) ||
+          __get_user(txc.tick, &utp->tick) ||
+          __get_user(txc.ppsfreq, &utp->ppsfreq) ||
+          __get_user(txc.jitter, &utp->jitter) ||
+          __get_user(txc.shift, &utp->shift) ||
+          __get_user(txc.stabil, &utp->stabil) ||
+          __get_user(txc.jitcnt, &utp->jitcnt) ||
+          __get_user(txc.calcnt, &utp->calcnt) ||
+          __get_user(txc.errcnt, &utp->errcnt) ||
+          __get_user(txc.stbcnt, &utp->stbcnt))
+               return -EFAULT;
+
+       ret = do_adjtimex(&txc);
+
+       if(verify_area(VERIFY_WRITE, utp, sizeof(struct timex32)) ||
+          __put_user(txc.modes, &utp->modes) ||
+          __put_user(txc.offset, &utp->offset) ||
+          __put_user(txc.freq, &utp->freq) ||
+          __put_user(txc.maxerror, &utp->maxerror) ||
+          __put_user(txc.esterror, &utp->esterror) ||
+          __put_user(txc.status, &utp->status) ||
+          __put_user(txc.constant, &utp->constant) ||
+          __put_user(txc.precision, &utp->precision) ||
+          __put_user(txc.tolerance, &utp->tolerance) ||
+          __put_user(txc.time.tv_sec, &utp->time.tv_sec) ||
+          __put_user(txc.time.tv_usec, &utp->time.tv_usec) ||
+          __put_user(txc.tick, &utp->tick) ||
+          __put_user(txc.ppsfreq, &utp->ppsfreq) ||
+          __put_user(txc.jitter, &utp->jitter) ||
+          __put_user(txc.shift, &utp->shift) ||
+          __put_user(txc.stabil, &utp->stabil) ||
+          __put_user(txc.jitcnt, &utp->jitcnt) ||
+          __put_user(txc.calcnt, &utp->calcnt) ||
+          __put_user(txc.errcnt, &utp->errcnt) ||
+          __put_user(txc.stbcnt, &utp->stbcnt))
+               ret = -EFAULT;
+
+       return ret;
+}
+
+
+/* common code for old and new mmaps */
+static inline long do_mmap2(
+       unsigned long addr, unsigned long len,
+       unsigned long prot, unsigned long flags,
+       unsigned long fd, unsigned long pgoff)
+{
+       int error = -EBADF;
+       struct file * file = NULL;
+
+       flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+       if (!(flags & MAP_ANONYMOUS)) {
+               file = fget(fd);
+               if (!file)
+                       goto out;
+       }
+
+       down_write(&current->mm->mmap_sem);
+       error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+       up_write(&current->mm->mmap_sem);
+
+       if (file)
+               fput(file);
+out:
+       return error;
+}
+
+asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len,
+       unsigned long prot, unsigned long flags,
+       unsigned long fd, unsigned long pgoff)
+{
+       return do_mmap2(addr, len, prot, flags, fd, pgoff);
+}
+
+
+asmlinkage int sys32_olduname(struct oldold_utsname * name)
+{
+       int error;
+
+       if (!name)
+               return -EFAULT;
+       if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
+               return -EFAULT;
+  
+       down_read(&uts_sem);
+       
+       error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
+        __put_user(0,name->sysname+__OLD_UTS_LEN);
+        __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
+        __put_user(0,name->nodename+__OLD_UTS_LEN);
+        __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
+        __put_user(0,name->release+__OLD_UTS_LEN);
+        __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
+        __put_user(0,name->version+__OLD_UTS_LEN);
+        { 
+                char *arch = current->personality == PER_LINUX32
+                        ? "i386" : "x86_64"; 
+                
+                __copy_to_user(&name->machine,arch,strlen(arch)+1);
+        }
+       
+        up_read(&uts_sem);
+        
+        error = error ? -EFAULT : 0;
+        
+        return error;
+}
+
+int sys32_uname(struct old_utsname * name)
+{
+       int err;
+       if (!name)
+               return -EFAULT;
+       down_read(&uts_sem);
+       err=copy_to_user(name, &system_utsname, sizeof (*name));
+       up_read(&uts_sem);
+       return err?-EFAULT:0;
+}
+
+extern int sys_ustat(dev_t, struct ustat *);
+
+int sys32_ustat(dev_t dev, struct ustat32 *u32p)
+{
+       struct ustat u;
+       mm_segment_t seg;
+       int ret;
+       
+       seg = get_fs(); 
+       set_fs(KERNEL_DS); 
+       ret = sys_ustat(dev,&u); 
+       set_fs(seg);
+       if (ret >= 0) { 
+               if (!access_ok(VERIFY_WRITE,u32p,sizeof(struct ustat32)) || 
+                   __put_user((__u32) u.f_tfree, &u32p->f_tfree) ||
+                   __put_user((__u32) u.f_tinode, &u32p->f_tfree) ||
+                   __copy_to_user(&u32p->f_fname, u.f_fname, sizeof(u.f_fname)) ||
+                   __copy_to_user(&u32p->f_fpack, u.f_fpack, sizeof(u.f_fpack)))
+                       ret = -EFAULT;
+       }
+       return ret;
+} 
+
+static int nargs(u32 src, char **dst) 
+{ 
+       int cnt;
+       u32 val; 
+
+       cnt = 0; 
+       do {            
+               int ret = get_user(val, (__u32 *)(u64)src); 
+               if (ret)  {
+                       return ret;
+               }       
+               if (dst)
+                       dst[cnt] = (char *)(u64)val; 
+               cnt++;
+               src += 4;       
+       } while(val && cnt < 1023);  // XXX: fix limit.
+       if (dst)
+               dst[cnt-1] = 0; 
+       return cnt; 
+} 
+
+int sys32_execve(char *name, u32 argv, u32 envp, struct pt_regs regs)
+{ 
+       mm_segment_t oldseg; 
+       char **buf; 
+       int na,ne;
+       int ret;
+
+       na = nargs(argv, NULL); 
+       if (na < 0) 
+               return -EFAULT; 
+       ne = nargs(envp, NULL); 
+       if (ne < 0) 
+               return -EFAULT; 
+
+       buf = kmalloc((na+ne)*sizeof(char*), GFP_KERNEL); 
+       if (!buf)
+               return -ENOMEM; 
+       
+       ret = nargs(argv, buf);
+       if (ret < 0)
+               goto free;
+
+       ret = nargs(envp, buf + na); 
+       if (ret < 0)
+               goto free; 
+
+       name = getname(name); 
+       ret = PTR_ERR(name); 
+       if (IS_ERR(name))
+               goto free; 
+
+       oldseg = get_fs(); 
+       set_fs(KERNEL_DS);
+       ret = do_execve(name, buf, buf+na, &regs);  
+       set_fs(oldseg); 
+
+       if (ret == 0)
+               current->ptrace &= ~PT_DTRACE;
+
+       putname(name);
+free:
+       kfree(buf);
+       return ret; 
+} 
+
+asmlinkage int sys32_fork(struct pt_regs regs)
+{
+       return do_fork(SIGCHLD, regs.rsp, &regs, 0);
+}
+
+asmlinkage int sys32_clone(unsigned int clone_flags, unsigned int newsp, struct pt_regs regs)
+{
+       if (!newsp)
+               newsp = regs.rsp;
+       return do_fork(clone_flags, newsp, &regs, 0);
+}
+
+/*
+ * This is trivial, and on the face of it looks like it
+ * could equally well be done in user mode.
+ *
+ * Not so, for quite unobvious reasons - register pressure.
+ * In user mode vfork() cannot have a stack frame, and if
+ * done by calling the "clone()" system call directly, you
+ * do not have enough call-clobbered registers to hold all
+ * the information you need.
+ */
+asmlinkage int sys32_vfork(struct pt_regs regs)
+{
+       return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.rsp, &regs, 0);
+}
+
+/*
+ * Some system calls that need sign extended arguments. This could be done by a generic wrapper.
+ */ 
+
+extern off_t sys_lseek (unsigned int fd, off_t offset, unsigned int origin);
+
+int sys32_lseek (unsigned int fd, int offset, unsigned int whence)
+{
+       return sys_lseek(fd, offset, whence);
+}
+
+extern int sys_kill(pid_t pid, int sig); 
+
+int sys32_kill(int pid, int sig)
+{
+       return sys_kill(pid, sig);
+}
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
new file mode 100644 (file)
index 0000000..2a31864
--- /dev/null
@@ -0,0 +1,38 @@
+#
+# Makefile for the linux kernel.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definitions are now in the main makefile...
+
+.S.o:
+       $(CC) $(AFLAGS) -traditional -c $< -o $*.o
+
+all: kernel.o head.o head64.o init_task.o
+
+O_TARGET := kernel.o
+
+
+export-objs     := mtrr.o msr.o cpuid.o x8664_ksyms.o
+
+obj-y  := process.o semaphore.o signal.o entry.o traps.o irq.o \
+               ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_x86_64.o \
+               pci-dma.o x8664_ksyms.o i387.o syscall.o early_printk.o vsyscall.o \
+               setup64.o bluesmoke.o
+
+ifdef CONFIG_PCI
+obj-y                  += pci-x86_64.o
+obj-y                  += pci-pc.o pci-irq.o
+endif
+
+obj-$(CONFIG_MTRR)     += mtrr.o
+obj-$(CONFIG_X86_MSR)  += msr.o
+obj-$(CONFIG_X86_CPUID)        += cpuid.o
+obj-$(CONFIG_SMP)      += smp.o smpboot.o trampoline.o
+obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o  nmi.o
+obj-$(CONFIG_X86_IO_APIC)      += io_apic.o mpparse.o
+
+include $(TOPDIR)/Rules.make
+
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
new file mode 100644 (file)
index 0000000..63aabda
--- /dev/null
@@ -0,0 +1,1160 @@
+/*
+ *     Local APIC handling, local APIC timers
+ *
+ *     (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *     Fixes
+ *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
+ *                                     thanks to Eric Gilmore
+ *                                     and Rolf G. Tews
+ *                                     for testing these extensively.
+ *     Maciej W. Rozycki       :       Various updates and fixes.
+ *     Mikael Pettersson       :       Power Management for UP-APIC.
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/atomic.h>
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/pgalloc.h>
+
+/* Using APIC to generate smp_local_timer_interrupt? */
+int using_apic_timer = 0;
+
+int prof_multiplier[NR_CPUS] = { 1, };
+int prof_old_multiplier[NR_CPUS] = { 1, };
+int prof_counter[NR_CPUS] = { 1, };
+
+int get_maxlvt(void)
+{
+       unsigned int v, ver, maxlvt;
+
+       v = apic_read(APIC_LVR);
+       ver = GET_APIC_VERSION(v);
+       /* 82489DXs do not report # of LVT entries. */
+       maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2;
+       return maxlvt;
+}
+
+void clear_local_APIC(void)
+{
+       int maxlvt;
+       unsigned long v;
+
+       maxlvt = get_maxlvt();
+
+       /*
+        * Masking an LVT entry on a P6 can trigger a local APIC error
+        * if the vector is zero. Mask LVTERR first to prevent this.
+        */
+       if (maxlvt >= 3) {
+               v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
+               apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
+       }
+       /*
+        * Careful: we have to set masks only first to deassert
+        * any level-triggered sources.
+        */
+       v = apic_read(APIC_LVTT);
+       apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+       v = apic_read(APIC_LVT0);
+       apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+       v = apic_read(APIC_LVT1);
+       apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
+       if (maxlvt >= 4) {
+               v = apic_read(APIC_LVTPC);
+               apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
+       }
+
+       /*
+        * Clean APIC state for other OSs:
+        */
+       apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
+       apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
+       apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
+       if (maxlvt >= 3)
+               apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
+       if (maxlvt >= 4)
+               apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
+       v = GET_APIC_VERSION(apic_read(APIC_LVR));
+       if (APIC_INTEGRATED(v)) {       /* !82489DX */
+               if (maxlvt > 3)
+                       apic_write(APIC_ESR, 0);
+               apic_read(APIC_ESR);
+       }
+}
+
+void __init connect_bsp_APIC(void)
+{
+       if (pic_mode) {
+               /*
+                * Do not trust the local APIC being empty at bootup.
+                */
+               clear_local_APIC();
+               /*
+                * PIC mode, enable APIC mode in the IMCR, i.e.
+                * connect BSP's local APIC to INT and NMI lines.
+                */
+               printk("leaving PIC mode, enabling APIC mode.\n");
+               outb(0x70, 0x22);
+               outb(0x01, 0x23);
+       }
+}
+
+void disconnect_bsp_APIC(void)
+{
+       if (pic_mode) {
+               /*
+                * Put the board back into PIC mode (has an effect
+                * only on certain older boards).  Note that APIC
+                * interrupts, including IPIs, won't work beyond
+                * this point!  The only exception are INIT IPIs.
+                */
+               printk("disabling APIC mode, entering PIC mode.\n");
+               outb(0x70, 0x22);
+               outb(0x00, 0x23);
+       }
+}
+
+void disable_local_APIC(void)
+{
+       unsigned long value;
+
+       clear_local_APIC();
+
+       /*
+        * Disable APIC (implies clearing of registers
+        * for 82489DX!).
+        */
+       value = apic_read(APIC_SPIV);
+       value &= ~APIC_SPIV_APIC_ENABLED;
+       apic_write_around(APIC_SPIV, value);
+}
+
+/*
+ * This is to verify that we're looking at a real local APIC.
+ * Check these against your board if the CPUs aren't getting
+ * started for no apparent reason.
+ */
+int __init verify_local_APIC(void)
+{
+       unsigned int reg0, reg1;
+
+       /*
+        * The version register is read-only in a real APIC.
+        */
+       reg0 = apic_read(APIC_LVR);
+       Dprintk("Getting VERSION: %x\n", reg0);
+       apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
+       reg1 = apic_read(APIC_LVR);
+       Dprintk("Getting VERSION: %x\n", reg1);
+
+       /*
+        * The two version reads above should print the same
+        * numbers.  If the second one is different, then we
+        * poke at a non-APIC.
+        */
+       if (reg1 != reg0)
+               return 0;
+
+       /*
+        * Check if the version looks reasonably.
+        */
+       reg1 = GET_APIC_VERSION(reg0);
+       if (reg1 == 0x00 || reg1 == 0xff)
+               return 0;
+       reg1 = get_maxlvt();
+       if (reg1 < 0x02 || reg1 == 0xff)
+               return 0;
+
+       /*
+        * The ID register is read/write in a real APIC.
+        */
+       reg0 = apic_read(APIC_ID);
+       Dprintk("Getting ID: %x\n", reg0);
+       apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
+       reg1 = apic_read(APIC_ID);
+       Dprintk("Getting ID: %x\n", reg1);
+       apic_write(APIC_ID, reg0);
+       if (reg1 != (reg0 ^ APIC_ID_MASK))
+               return 0;
+
+       /*
+        * The next two are just to see if we have sane values.
+        * They're only really relevant if we're in Virtual Wire
+        * compatibility mode, but most boxes are anymore.
+        */
+       reg0 = apic_read(APIC_LVT0);
+       Dprintk("Getting LVT0: %x\n", reg0);
+       reg1 = apic_read(APIC_LVT1);
+       Dprintk("Getting LVT1: %x\n", reg1);
+
+       return 1;
+}
+
+void __init sync_Arb_IDs(void)
+{
+       /*
+        * Wait for idle.
+        */
+       apic_wait_icr_idle();
+
+       Dprintk("Synchronizing Arb IDs.\n");
+       apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
+                               | APIC_DM_INIT);
+}
+
+extern void __error_in_apic_c (void);
+
+/*
+ * An initial setup of the virtual wire mode.
+ */
+void __init init_bsp_APIC(void)
+{
+       unsigned long value, ver;
+
+       /*
+        * Don't do the setup now if we have a SMP BIOS as the
+        * through-I/O-APIC virtual wire mode might be active.
+        */
+       if (smp_found_config || !cpu_has_apic)
+               return;
+
+       value = apic_read(APIC_LVR);
+       ver = GET_APIC_VERSION(value);
+
+       /*
+        * Do not trust the local APIC being empty at bootup.
+        */
+       clear_local_APIC();
+
+       /*
+        * Enable APIC.
+        */
+       value = apic_read(APIC_SPIV);
+       value &= ~APIC_VECTOR_MASK;
+       value |= APIC_SPIV_APIC_ENABLED;
+       value |= APIC_SPIV_FOCUS_DISABLED;
+       value |= SPURIOUS_APIC_VECTOR;
+       apic_write_around(APIC_SPIV, value);
+
+       /*
+        * Set up the virtual wire mode.
+        */
+       apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+       value = APIC_DM_NMI;
+       if (!APIC_INTEGRATED(ver))              /* 82489DX */
+               value |= APIC_LVT_LEVEL_TRIGGER;
+       apic_write_around(APIC_LVT1, value);
+}
+
+void __init setup_local_APIC (void)
+{
+       unsigned long value, ver, maxlvt;
+
+       /* Pound the ESR really hard over the head with a big hammer - mbligh */
+       if (esr_disable) {
+               apic_write(APIC_ESR, 0);
+               apic_write(APIC_ESR, 0);
+               apic_write(APIC_ESR, 0);
+               apic_write(APIC_ESR, 0);
+       }
+
+       value = apic_read(APIC_LVR);
+       ver = GET_APIC_VERSION(value);
+
+       if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f)
+               __error_in_apic_c();
+
+       /*
+        * Double-check wether this APIC is really registered.
+        * This is meaningless in clustered apic mode, so we skip it.
+        */
+       if (!clustered_apic_mode && 
+           !test_bit(GET_APIC_ID(apic_read(APIC_ID)), &phys_cpu_present_map))
+               BUG();
+
+       /*
+        * Intel recommends to set DFR, LDR and TPR before enabling
+        * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
+        * document number 292116).  So here it goes...
+        */
+
+       if (!clustered_apic_mode) {
+               /*
+                * In clustered apic mode, the firmware does this for us 
+                * Put the APIC into flat delivery mode.
+                * Must be "all ones" explicitly for 82489DX.
+                */
+               apic_write_around(APIC_DFR, 0xffffffff);
+
+               /*
+                * Set up the logical destination ID.
+                */
+               value = apic_read(APIC_LDR);
+               value &= ~APIC_LDR_MASK;
+               value |= (1<<(smp_processor_id()+24));
+               apic_write_around(APIC_LDR, value);
+       }
+
+       /*
+        * Set Task Priority to 'accept all'. We never change this
+        * later on.
+        */
+       value = apic_read(APIC_TASKPRI);
+       value &= ~APIC_TPRI_MASK;
+       apic_write_around(APIC_TASKPRI, value);
+
+       /*
+        * Now that we are all set up, enable the APIC
+        */
+       value = apic_read(APIC_SPIV);
+       value &= ~APIC_VECTOR_MASK;
+       /*
+        * Enable APIC
+        */
+       value |= APIC_SPIV_APIC_ENABLED;
+
+       /*
+        * Some unknown Intel IO/APIC (or APIC) errata is biting us with
+        * certain networking cards. If high frequency interrupts are
+        * happening on a particular IOAPIC pin, plus the IOAPIC routing
+        * entry is masked/unmasked at a high rate as well then sooner or
+        * later IOAPIC line gets 'stuck', no more interrupts are received
+        * from the device. If focus CPU is disabled then the hang goes
+        * away, oh well :-(
+        *
+        * [ This bug can be reproduced easily with a level-triggered
+        *   PCI Ne2000 networking cards and PII/PIII processors, dual
+        *   BX chipset. ]
+        */
+       /*
+        * Actually disabling the focus CPU check just makes the hang less
+        * frequent as it makes the interrupt distributon model be more
+        * like LRU than MRU (the short-term load is more even across CPUs).
+        * See also the comment in end_level_ioapic_irq().  --macro
+        */
+#if 1
+       /* Enable focus processor (bit==0) */
+       value &= ~APIC_SPIV_FOCUS_DISABLED;
+#else
+       /* Disable focus processor (bit==1) */
+       value |= APIC_SPIV_FOCUS_DISABLED;
+#endif
+       /*
+        * Set spurious IRQ vector
+        */
+       value |= SPURIOUS_APIC_VECTOR;
+       apic_write_around(APIC_SPIV, value);
+
+       /*
+        * Set up LVT0, LVT1:
+        *
+        * set up through-local-APIC on the BP's LINT0. This is not
+        * strictly necessery in pure symmetric-IO mode, but sometimes
+        * we delegate interrupts to the 8259A.
+        */
+       /*
+        * TODO: set up through-local-APIC from through-I/O-APIC? --macro
+        */
+       value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
+       if (!smp_processor_id() && (pic_mode || !value)) {
+               value = APIC_DM_EXTINT;
+               printk("enabled ExtINT on CPU#%d\n", smp_processor_id());
+       } else {
+               value = APIC_DM_EXTINT | APIC_LVT_MASKED;
+               printk("masked ExtINT on CPU#%d\n", smp_processor_id());
+       }
+       apic_write_around(APIC_LVT0, value);
+
+       /*
+        * only the BP should see the LINT1 NMI signal, obviously.
+        */
+       if (!smp_processor_id())
+               value = APIC_DM_NMI;
+       else
+               value = APIC_DM_NMI | APIC_LVT_MASKED;
+       if (!APIC_INTEGRATED(ver))              /* 82489DX */
+               value |= APIC_LVT_LEVEL_TRIGGER;
+       apic_write_around(APIC_LVT1, value);
+
+       if (APIC_INTEGRATED(ver) && !esr_disable) {             /* !82489DX */
+               maxlvt = get_maxlvt();
+               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+                       apic_write(APIC_ESR, 0);
+               value = apic_read(APIC_ESR);
+               printk("ESR value before enabling vector: %08lx\n", value);
+
+               value = ERROR_APIC_VECTOR;      // enables sending errors
+               apic_write_around(APIC_LVTERR, value);
+               /*
+                * spec says clear errors after enabling vector.
+                */
+               if (maxlvt > 3)
+                       apic_write(APIC_ESR, 0);
+               value = apic_read(APIC_ESR);
+               printk("ESR value after enabling vector: %08lx\n", value);
+       } else {
+               if (esr_disable)        
+                       /* 
+                        * Something untraceble is creating bad interrupts on 
+                        * secondary quads ... for the moment, just leave the
+                        * ESR disabled - we can't do anything useful with the
+                        * errors anyway - mbligh
+                        */
+                       printk("Leaving ESR disabled.\n");
+               else 
+                       printk("No ESR for 82489DX.\n");
+       }
+
+       if (nmi_watchdog == NMI_LOCAL_APIC)
+               setup_apic_nmi_watchdog();
+}
+
+#ifdef CONFIG_PM
+
+#include <linux/slab.h>
+#include <linux/pm.h>
+
+static struct {
+       /* 'active' is true if the local APIC was enabled by us and
+          not the BIOS; this signifies that we are also responsible
+          for disabling it before entering apm/acpi suspend */
+       int active;
+       /* 'perfctr_pmdev' is here because the current (2.4.1) PM
+          callback system doesn't handle hierarchical dependencies */
+       struct pm_dev *perfctr_pmdev;
+       /* r/w apic fields */
+       unsigned int apic_id;
+       unsigned int apic_taskpri;
+       unsigned int apic_ldr;
+       unsigned int apic_dfr;
+       unsigned int apic_spiv;
+       unsigned int apic_lvtt;
+       unsigned int apic_lvtpc;
+       unsigned int apic_lvt0;
+       unsigned int apic_lvt1;
+       unsigned int apic_lvterr;
+       unsigned int apic_tmict;
+       unsigned int apic_tdcr;
+} apic_pm_state;
+
+static void apic_pm_suspend(void *data)
+{
+       unsigned int l, h;
+       unsigned long flags;
+
+       if (apic_pm_state.perfctr_pmdev)
+               pm_send(apic_pm_state.perfctr_pmdev, PM_SUSPEND, data);
+       apic_pm_state.apic_id = apic_read(APIC_ID);
+       apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
+       apic_pm_state.apic_ldr = apic_read(APIC_LDR);
+       apic_pm_state.apic_dfr = apic_read(APIC_DFR);
+       apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
+       apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
+       apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+       apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
+       apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
+       apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
+       apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
+       apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
+       __save_flags(flags);
+       __cli();
+       disable_local_APIC();
+       rdmsr(MSR_IA32_APICBASE, l, h);
+       l &= ~MSR_IA32_APICBASE_ENABLE;
+       wrmsr(MSR_IA32_APICBASE, l, h);
+       __restore_flags(flags);
+}
+
+static void apic_pm_resume(void *data)
+{
+       unsigned int l, h;
+       unsigned long flags;
+
+       __save_flags(flags);
+       __cli();
+       rdmsr(MSR_IA32_APICBASE, l, h);
+       l &= ~MSR_IA32_APICBASE_BASE;
+       l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+       wrmsr(MSR_IA32_APICBASE, l, h);
+       apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
+       apic_write(APIC_ID, apic_pm_state.apic_id);
+       apic_write(APIC_DFR, apic_pm_state.apic_dfr);
+       apic_write(APIC_LDR, apic_pm_state.apic_ldr);
+       apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
+       apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
+       apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
+       apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
+       apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
+       apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
+       apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
+       apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
+       apic_write(APIC_ESR, 0);
+       apic_read(APIC_ESR);
+       apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
+       apic_write(APIC_ESR, 0);
+       apic_read(APIC_ESR);
+       __restore_flags(flags);
+       if (apic_pm_state.perfctr_pmdev)
+               pm_send(apic_pm_state.perfctr_pmdev, PM_RESUME, data);
+}
+
+static int apic_pm_callback(struct pm_dev *dev, pm_request_t rqst, void *data)
+{
+       switch (rqst) {
+       case PM_SUSPEND:
+               apic_pm_suspend(data);
+               break;
+       case PM_RESUME:
+               apic_pm_resume(data);
+               break;
+       }
+       return 0;
+}
+
+/* perfctr driver should call this instead of pm_register() */
+struct pm_dev *apic_pm_register(pm_dev_t type,
+                               unsigned long id,
+                               pm_callback callback)
+{
+       struct pm_dev *dev;
+
+       if (!apic_pm_state.active)
+               return pm_register(type, id, callback);
+       if (apic_pm_state.perfctr_pmdev)
+               return NULL;    /* we're busy */
+       dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL);
+       if (dev) {
+               memset(dev, 0, sizeof(*dev));
+               dev->type = type;
+               dev->id = id;
+               dev->callback = callback;
+               apic_pm_state.perfctr_pmdev = dev;
+       }
+       return dev;
+}
+
+/* perfctr driver should call this instead of pm_unregister() */
+void apic_pm_unregister(struct pm_dev *dev)
+{
+       if (!apic_pm_state.active) {
+               pm_unregister(dev);
+       } else if (dev == apic_pm_state.perfctr_pmdev) {
+               apic_pm_state.perfctr_pmdev = NULL;
+               kfree(dev);
+       }
+}
+
+static void __init apic_pm_init1(void)
+{
+       /* can't pm_register() at this early stage in the boot process
+          (causes an immediate reboot), so just set the flag */
+       apic_pm_state.active = 1;
+}
+
+static void __init apic_pm_init2(void)
+{
+       if (apic_pm_state.active)
+               pm_register(PM_SYS_DEV, 0, apic_pm_callback);
+}
+
+#else  /* CONFIG_PM */
+
+static inline void apic_pm_init1(void) { }
+static inline void apic_pm_init2(void) { }
+
+#endif /* CONFIG_PM */
+
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ */
+
+static int __init detect_init_APIC (void)
+{
+       u32 h, l, features;
+       int needs_pm = 0;
+       extern void get_cpu_vendor(struct cpuinfo_x86*);
+
+       /* Workaround for us being called before identify_cpu(). */
+       get_cpu_vendor(&boot_cpu_data);
+
+       switch (boot_cpu_data.x86_vendor) {
+       case X86_VENDOR_AMD:
+               if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1)
+                       break;
+               goto no_apic;
+       case X86_VENDOR_INTEL:
+               if (boot_cpu_data.x86 == 6 ||
+                   (boot_cpu_data.x86 == 15 && cpu_has_apic) ||
+                   (boot_cpu_data.x86 == 5 && cpu_has_apic))
+                       break;
+               goto no_apic;
+       default:
+               goto no_apic;
+       }
+
+       if (!cpu_has_apic) {
+               /*
+                * Some BIOSes disable the local APIC in the
+                * APIC_BASE MSR. This can only be done in
+                * software for Intel P6 and AMD K7 (Model > 1).
+                */
+               rdmsr(MSR_IA32_APICBASE, l, h);
+               if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+                       printk("Local APIC disabled by BIOS -- reenabling.\n");
+                       l &= ~MSR_IA32_APICBASE_BASE;
+                       l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+                       wrmsr(MSR_IA32_APICBASE, l, h);
+                       needs_pm = 1;
+               }
+       }
+       /*
+        * The APIC feature bit should now be enabled
+        * in `cpuid'
+        */
+       features = cpuid_edx(1);
+       if (!(features & (1 << X86_FEATURE_APIC))) {
+               printk("Could not enable APIC!\n");
+               return -1;
+       }
+       set_bit(X86_FEATURE_APIC, &boot_cpu_data.x86_capability);
+       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+       boot_cpu_id = 0;
+       if (nmi_watchdog != NMI_NONE)
+               nmi_watchdog = NMI_LOCAL_APIC;
+
+       printk("Found and enabled local APIC!\n");
+
+       if (needs_pm)
+               apic_pm_init1();
+
+       return 0;
+
+no_apic:
+       printk("No local APIC present or hardware disabled\n");
+       return -1;
+}
+
+void __init init_apic_mappings(void)
+{
+       unsigned long apic_phys;
+
+       /*
+        * If no local APIC can be found then set up a fake all
+        * zeroes page to simulate the local APIC and another
+        * one for the IO-APIC.
+        */
+       if (!smp_found_config && detect_init_APIC()) {
+               apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
+               apic_phys = __pa(apic_phys);
+       } else
+               apic_phys = mp_lapic_addr;
+
+       set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+       Dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys);
+
+       /*
+        * Fetch the APIC ID of the BSP in case we have a
+        * default configuration (or the MP table is broken).
+        */
+       if (boot_cpu_id == -1U)
+               boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
+
+#ifdef CONFIG_X86_IO_APIC
+       {
+               unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
+               int i;
+
+               for (i = 0; i < nr_ioapics; i++) {
+                       if (smp_found_config) {
+                               ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+                       } else {
+                               ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
+                               ioapic_phys = __pa(ioapic_phys);
+                       }
+                       set_fixmap_nocache(idx, ioapic_phys);
+                       Dprintk("mapped IOAPIC to %08lx (%08lx)\n",
+                                       __fix_to_virt(idx), ioapic_phys);
+                       idx++;
+               }
+       }
+#endif
+}
+
+/*
+ * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts
+ * per second. We assume that the caller has already set up the local
+ * APIC.
+ *
+ * The APIC timer is not exactly sync with the external timer chip, it
+ * closely follows bus clocks.
+ */
+
+/*
+ * The timer chip is already set up at HZ interrupts per second here,
+ * but we do not accept timer interrupts yet. We only allow the BP
+ * to calibrate.
+ */
+static unsigned int __init get_8254_timer_count(void)
+{
+       extern spinlock_t i8253_lock;
+       unsigned long flags;
+
+       unsigned int count;
+
+       spin_lock_irqsave(&i8253_lock, flags);
+
+       outb_p(0x00, 0x43);
+       count = inb_p(0x40);
+       count |= inb_p(0x40) << 8;
+
+       spin_unlock_irqrestore(&i8253_lock, flags);
+
+       return count;
+}
+
+void __init wait_8254_wraparound(void)
+{
+       unsigned int curr_count, prev_count=~0;
+       int delta;
+
+       curr_count = get_8254_timer_count();
+
+       do {
+               prev_count = curr_count;
+               curr_count = get_8254_timer_count();
+               delta = curr_count-prev_count;
+
+       /*
+        * This limit for delta seems arbitrary, but it isn't, it's
+        * slightly above the level of error a buggy Mercury/Neptune
+        * chipset timer can cause.
+        */
+
+       } while (delta < 300);
+}
+
+/*
+ * This function sets up the local APIC timer, with a timeout of
+ * 'clocks' APIC bus clock. During calibration we actually call
+ * this function twice on the boot CPU, once with a bogus timeout
+ * value, second time for real. The other (noncalibrating) CPUs
+ * call this function only once, with the real, calibrated value.
+ *
+ * We do reads before writes even if unnecessary, to get around the
+ * P5 APIC double write bug.
+ */
+
+#define APIC_DIVISOR 16
+
+void __setup_APIC_LVTT(unsigned int clocks)
+{
+       unsigned int lvtt1_value, tmp_value;
+
+       lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV) |
+                       APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR;
+       apic_write_around(APIC_LVTT, lvtt1_value);
+
+       /*
+        * Divide PICLK by 16
+        */
+       tmp_value = apic_read(APIC_TDCR);
+       apic_write_around(APIC_TDCR, (tmp_value
+                               & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
+                               | APIC_TDR_DIV_16);
+
+       apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
+}
+
+void setup_APIC_timer(void * data)
+{
+       unsigned long clocks = (unsigned long) data, slice, t0, t1;
+       unsigned long flags;
+       int delta;
+
+       __save_flags(flags);
+       __sti();
+       /*
+        * ok, Intel has some smart code in their APIC that knows
+        * if a CPU was in 'hlt' lowpower mode, and this increases
+        * its APIC arbitration priority. To avoid the external timer
+        * IRQ APIC event being in synchron with the APIC clock we
+        * introduce an interrupt skew to spread out timer events.
+        *
+        * The number of slices within a 'big' timeslice is smp_num_cpus+1
+        */
+
+       slice = clocks / (smp_num_cpus+1);
+       printk("cpu: %d, clocks: %lu, slice: %lu\n", smp_processor_id(), clocks, slice);
+
+       /*
+        * Wait for IRQ0's slice:
+        */
+       wait_8254_wraparound();
+
+       __setup_APIC_LVTT(clocks);
+
+       t0 = apic_read(APIC_TMICT)*APIC_DIVISOR;
+       /* Wait till TMCCT gets reloaded from TMICT... */
+       do {
+               t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR;
+               delta = (int)(t0 - t1 - slice*(smp_processor_id()+1));
+       } while (delta >= 0);
+       /* Now wait for our slice for real. */
+       do {
+               t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR;
+               delta = (int)(t0 - t1 - slice*(smp_processor_id()+1));
+       } while (delta < 0);
+
+       __setup_APIC_LVTT(clocks);
+
+       printk("CPU%d<T0:%lu,T1:%lu,D:%d,S:%lu,C:%lu>\n", smp_processor_id(), t0, t1, delta, slice, clocks);
+
+       __restore_flags(flags);
+}
+
+/*
+ * In this function we calibrate APIC bus clocks to the external
+ * timer. Unfortunately we cannot use jiffies and the timer irq
+ * to calibrate, since some later bootup code depends on getting
+ * the first irq? Ugh.
+ *
+ * We want to do the calibration only once since we
+ * want to have local timer irqs syncron. CPUs connected
+ * by the same APIC bus have the very same bus frequency.
+ * And we want to have irqs off anyways, no accidental
+ * APIC irq that way.
+ */
+
+int __init calibrate_APIC_clock(void)
+{
+       unsigned long long t1 = 0, t2 = 0;
+       long tt1, tt2;
+       long result;
+       int i;
+       const int LOOPS = HZ/10;
+
+       printk("calibrating APIC timer ...\n");
+
+       /*
+        * Put whatever arbitrary (but long enough) timeout
+        * value into the APIC clock, we just want to get the
+        * counter running for calibration.
+        */
+       __setup_APIC_LVTT(1000000000);
+
+       /*
+        * The timer chip counts down to zero. Let's wait
+        * for a wraparound to start exact measurement:
+        * (the current tick might have been already half done)
+        */
+
+       wait_8254_wraparound();
+
+       /*
+        * We wrapped around just now. Let's start:
+        */
+       if (cpu_has_tsc)
+               rdtscll(t1);
+       tt1 = apic_read(APIC_TMCCT);
+
+       /*
+        * Let's wait LOOPS wraprounds:
+        */
+       for (i = 0; i < LOOPS; i++)
+               wait_8254_wraparound();
+
+       tt2 = apic_read(APIC_TMCCT);
+       if (cpu_has_tsc)
+               rdtscll(t2);
+
+       /*
+        * The APIC bus clock counter is 32 bits only, it
+        * might have overflown, but note that we use signed
+        * longs, thus no extra care needed.
+        *
+        * underflown to be exact, as the timer counts down ;)
+        */
+
+       result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
+
+       if (cpu_has_tsc)
+               printk("..... CPU clock speed is %ld.%04ld MHz.\n",
+                       ((long)(t2-t1)/LOOPS)/(1000000/HZ),
+                       ((long)(t2-t1)/LOOPS)%(1000000/HZ));
+
+       printk("..... host bus clock speed is %ld.%04ld MHz.\n",
+               result/(1000000/HZ),
+               result%(1000000/HZ));
+
+       return result;
+}
+
+static unsigned long calibration_result;
+
+void __init setup_APIC_clocks (void)
+{
+       printk("Using local APIC timer interrupts.\n");
+       using_apic_timer = 1;
+
+       __cli();
+
+       calibration_result = calibrate_APIC_clock();
+       /*
+        * Now set up the timer for real.
+        */
+       setup_APIC_timer((void *)calibration_result);
+
+       __sti();
+
+       /* and update all other cpus */
+       smp_call_function(setup_APIC_timer, (void *)calibration_result, 1, 1);
+}
+
+void __init disable_APIC_timer(void)
+{
+       if (using_apic_timer) {
+               unsigned long v;
+
+               v = apic_read(APIC_LVTT);
+               apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
+       }
+}
+
+void enable_APIC_timer(void)
+{
+       if (using_apic_timer) {
+               unsigned long v;
+
+               v = apic_read(APIC_LVTT);
+               apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED);
+       }
+}
+
+/*
+ * the frequency of the profiling timer can be changed
+ * by writing a multiplier value into /proc/profile.
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+       int i;
+
+       /*
+        * Sanity check. [at least 500 APIC cycles should be
+        * between APIC interrupts as a rule of thumb, to avoid
+        * irqs flooding us]
+        */
+       if ( (!multiplier) || (calibration_result/multiplier < 500))
+               return -EINVAL;
+
+       /* 
+        * Set the new multiplier for each CPU. CPUs don't start using the
+        * new values until the next timer interrupt in which they do process
+        * accounting. At that time they also adjust their APIC timers
+        * accordingly.
+        */
+       for (i = 0; i < NR_CPUS; ++i)
+               prof_multiplier[i] = multiplier;
+
+       return 0;
+}
+
+#undef APIC_DIVISOR
+
+/*
+ * Local timer interrupt handler. It does both profiling and
+ * process statistics/rescheduling.
+ *
+ * We do profiling in every local tick, statistics/rescheduling
+ * happen only every 'profiling multiplier' ticks. The default
+ * multiplier is 1 and it can be changed by writing the new multiplier
+ * value into /proc/profile.
+ */
+
+inline void smp_local_timer_interrupt(struct pt_regs * regs)
+{
+       int user = user_mode(regs);
+       int cpu = smp_processor_id();
+
+       /*
+        * The profiling function is SMP safe. (nothing can mess
+        * around with "current", and the profiling counters are
+        * updated with atomic operations). This is especially
+        * useful with a profiling multiplier != 1
+        */
+       if (!user)
+               x86_do_profile(regs->rip);
+
+       if (--prof_counter[cpu] <= 0) {
+               /*
+                * The multiplier may have changed since the last time we got
+                * to this point as a result of the user writing to
+                * /proc/profile. In this case we need to adjust the APIC
+                * timer accordingly.
+                *
+                * Interrupts are already masked off at this point.
+                */
+               prof_counter[cpu] = prof_multiplier[cpu];
+               if (prof_counter[cpu] != prof_old_multiplier[cpu]) {
+                       __setup_APIC_LVTT(calibration_result/prof_counter[cpu]);
+                       prof_old_multiplier[cpu] = prof_counter[cpu];
+               }
+
+#ifdef CONFIG_SMP
+               update_process_times(user);
+#endif
+       }
+
+       /*
+        * We take the 'long' return path, and there every subsystem
+        * grabs the apropriate locks (kernel lock/ irq lock).
+        *
+        * we might want to decouple profiling from the 'long path',
+        * and do the profiling totally in assembly.
+        *
+        * Currently this isn't too much of an issue (performance wise),
+        * we can take more than 100K local irqs per second on a 100 MHz P5.
+        */
+}
+
+/*
+ * Local APIC timer interrupt. This is the most natural way for doing
+ * local interrupts, but local timer interrupts can be emulated by
+ * broadcast interrupts too. [in case the hw doesnt support APIC timers]
+ *
+ * [ if a single-CPU system runs an SMP kernel then we call the local
+ *   interrupt as well. Thus we cannot inline the local irq ... ]
+ */
+unsigned int apic_timer_irqs [NR_CPUS];
+
+void smp_apic_timer_interrupt(struct pt_regs regs)
+{
+       int cpu = smp_processor_id();
+
+       /*
+        * the NMI deadlock-detector uses this.
+        */
+       apic_timer_irqs[cpu]++;
+
+       /*
+        * NOTE! We'd better ACK the irq immediately,
+        * because timer handling can be slow.
+        */
+       ack_APIC_irq();
+       /*
+        * update_process_times() expects us to have done irq_enter().
+        * Besides, if we don't timer interrupts ignore the global
+        * interrupt lock, which is the WrongThing (tm) to do.
+        */
+       irq_enter(cpu, 0);
+       smp_local_timer_interrupt(&regs);
+       irq_exit(cpu, 0);
+
+       if (softirq_pending(cpu))
+               do_softirq();
+}
+
+/*
+ * This interrupt should _never_ happen with our APIC/SMP architecture
+ */
+asmlinkage void smp_spurious_interrupt(void)
+{
+       unsigned long v;
+
+       /*
+        * Check if this really is a spurious interrupt and ACK it
+        * if it is a vectored one.  Just in case...
+        * Spurious interrupts should not be ACKed.
+        */
+       v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
+       if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+               ack_APIC_irq();
+
+       /* see sw-dev-man vol 3, chapter 7.4.13.5 */
+       printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n",
+                       smp_processor_id());
+}
+
+/*
+ * This interrupt should never happen with our APIC/SMP architecture
+ */
+
+asmlinkage void smp_error_interrupt(void)
+{
+       unsigned long v, v1;
+
+       /* First tickle the hardware, only then report what went on. -- REW */
+       v = apic_read(APIC_ESR);
+       apic_write(APIC_ESR, 0);
+       v1 = apic_read(APIC_ESR);
+       ack_APIC_irq();
+       atomic_inc(&irq_err_count);
+
+       /* Here is what the APIC error bits mean:
+          0: Send CS error
+          1: Receive CS error
+          2: Send accept error
+          3: Receive accept error
+          4: Reserved
+          5: Send illegal vector
+          6: Received illegal vector
+          7: Illegal register address
+       */
+       printk (KERN_ERR "APIC error on CPU%d: %02lx(%02lx)\n",
+               smp_processor_id(), v , v1);
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int __init APIC_init_uniprocessor (void)
+{
+       if (!smp_found_config && !cpu_has_apic)
+               return -1;
+
+       /*
+        * Complain if the BIOS pretends there is one.
+        */
+       if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_id])) {
+               printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+                       boot_cpu_id);
+               return -1;
+       }
+
+       verify_local_APIC();
+
+       connect_bsp_APIC();
+
+       phys_cpu_present_map = 1;
+       apic_write_around(APIC_ID, boot_cpu_id);
+
+       apic_pm_init2();
+
+       setup_local_APIC();
+
+       if (nmi_watchdog == NMI_LOCAL_APIC)
+               check_nmi_watchdog();
+#ifdef CONFIG_X86_IO_APIC
+       if (smp_found_config)
+               if (!skip_ioapic_setup && nr_ioapics)
+                       setup_IO_APIC();
+#endif
+       setup_APIC_clocks();
+
+       return 0;
+}
diff --git a/arch/x86_64/kernel/bluesmoke.c b/arch/x86_64/kernel/bluesmoke.c
new file mode 100644 (file)
index 0000000..0e9a06e
--- /dev/null
@@ -0,0 +1,174 @@
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/processor.h> 
+#include <asm/msr.h>
+
+static int mce_disabled __initdata = 0;
+
+/*
+ *     Machine Check Handler For PII/PIII/K7
+ */
+
+static int banks;
+
+static void intel_machine_check(struct pt_regs * regs, long error_code)
+{
+       int recover=1;
+       u32 alow, ahigh, high, low;
+       u32 mcgstl, mcgsth;
+       int i;
+       
+       rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+       if(mcgstl&(1<<0))       /* Recoverable ? */
+               recover=0;
+
+       printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl);
+       
+       for(i=0;i<banks;i++)
+       {
+               rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high);
+               if(high&(1<<31))
+               {
+                       if(high&(1<<29))
+                               recover|=1;
+                       if(high&(1<<25))
+                               recover|=2;
+                       printk(KERN_EMERG "Bank %d: %08x%08x", i, high, low);
+                       high&=~(1<<31);
+                       if(high&(1<<27))
+                       {
+                               rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
+                               printk("[%08x%08x]", alow, ahigh);
+                       }
+                       if(high&(1<<26))
+                       {
+                               rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
+                               printk(" at %08x%08x", 
+                                       ahigh, alow);
+                       }
+                       printk("\n");
+                       /* Clear it */
+                       wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+                       /* Serialize */
+                       wmb();
+               }
+       }
+       
+       if(recover&2)
+               panic("CPU context corrupt");
+       if(recover&1)
+               panic("Unable to continue");
+       printk(KERN_EMERG "Attempting to continue.\n");
+       mcgstl&=~(1<<2);
+       wrmsr(MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+}
+
+static void unexpected_machine_check(struct pt_regs *regs, long error_code)
+{ 
+       printk("unexpected machine check %lx\n", error_code); 
+} 
+
+/*
+ *     Call the installed machine check handler for this CPU setup.
+ */ 
+static void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
+
+void do_machine_check(struct pt_regs * regs, long error_code)
+{
+       machine_check_vector(regs, error_code);
+}
+
+/*
+ *     Set up machine check reporting for Intel processors
+ */
+
+static void __init intel_mcheck_init(struct cpuinfo_x86 *c)
+{
+       u32 l, h;
+       int i;
+       static int done;
+       
+       /*
+        *      Check for MCE support
+        */
+
+       if( !test_bit(X86_FEATURE_MCE, &c->x86_capability) )
+               return; 
+       
+       /*
+        *      Check for PPro style MCA
+        */
+                       
+       if( !test_bit(X86_FEATURE_MCA, &c->x86_capability) )
+               return;
+               
+       /* Ok machine check is available */
+       
+       machine_check_vector = intel_machine_check;
+       wmb();
+       
+       if(done==0)
+               printk(KERN_INFO "Intel machine check architecture supported.\n");
+       rdmsr(MSR_IA32_MCG_CAP, l, h);
+       if(l&(1<<8))
+               wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+       banks = l&0xff;
+       for(i=1;i<banks;i++)
+       {
+               wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+       }
+       for(i=0;i<banks;i++)
+       {
+               wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+       }
+       set_in_cr4(X86_CR4_MCE);
+       printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", smp_processor_id());
+       done=1;
+}
+
+/*
+ *     This has to be run for each processor
+ */
+
+
+
+void __init mcheck_init(struct cpuinfo_x86 *c)
+{
+       if(mce_disabled==1)
+               return;
+               
+       switch(c->x86_vendor)
+       {
+               case X86_VENDOR_AMD:
+                       /*
+                        *      AMD K7 machine check is Intel like
+                        */
+                       if(c->x86 == 6)
+                               intel_mcheck_init(c);
+                       break;
+               case X86_VENDOR_INTEL:
+                       intel_mcheck_init(c);
+                       break;
+               default:
+                       break;
+       }
+}
+
+static int __init mcheck_disable(char *str)
+{
+       mce_disabled = 1;
+       return 0;
+}
+
+static int __init mcheck_enable(char *str)
+{
+       mce_disabled = -1;
+       return 0;
+}
+
+__setup("nomce", mcheck_disable);
+__setup("mce", mcheck_enable);
diff --git a/arch/x86_64/kernel/cpuid.c b/arch/x86_64/kernel/cpuid.c
new file mode 100644 (file)
index 0000000..950cf9d
--- /dev/null
@@ -0,0 +1,177 @@
+#ident "$Id: cpuid.c,v 1.4 2001/10/24 23:58:53 ak Exp $"
+/* ----------------------------------------------------------------------- *
+ *   
+ *   Copyright 2000 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+
+/*
+ * cpuid.c
+ *
+ * x86 CPUID access device
+ *
+ * This device is accessed by lseek() to the appropriate CPUID level
+ * and then read in chunks of 16 bytes.  A larger size means multiple
+ * reads of consecutive levels.
+ *
+ * This driver uses /dev/cpu/%d/cpuid where %d is the minor number, and on
+ * an SMP box will direct the access to CPU %d.
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/smp.h>
+#include <linux/major.h>
+#include <linux/smp_lock.h>
+#include <linux/fs.h>
+
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_SMP
+
+struct cpuid_command {
+  int cpu;
+  u32 reg;
+  u32 *data;
+};
+
+static void cpuid_smp_cpuid(void *cmd_block)
+{
+  struct cpuid_command *cmd = (struct cpuid_command *) cmd_block;
+  
+  if ( cmd->cpu == smp_processor_id() )
+    cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2], &cmd->data[3]);
+}
+
+static inline void do_cpuid(int cpu, u32 reg, u32 *data)
+{
+  struct cpuid_command cmd;
+  
+  if ( cpu == smp_processor_id() ) {
+    cpuid(reg, &data[0], &data[1], &data[2], &data[3]);
+  } else {
+    cmd.cpu  = cpu;
+    cmd.reg  = reg;
+    cmd.data = data;
+    
+    smp_call_function(cpuid_smp_cpuid, &cmd, 1, 1);
+  }
+}
+#else /* ! CONFIG_SMP */
+
+static inline void do_cpuid(int cpu, u32 reg, u32 *data)
+{
+  cpuid(reg, &data[0], &data[1], &data[2], &data[3]);
+}
+
+#endif /* ! CONFIG_SMP */
+
+static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
+{
+  loff_t ret;
+
+  lock_kernel();
+
+  switch (orig) {
+  case 0:
+    file->f_pos = offset;
+    ret = file->f_pos;
+    break;
+  case 1:
+    file->f_pos += offset;
+    ret = file->f_pos;
+    break;
+  default:
+    ret = -EINVAL;
+  }
+
+  unlock_kernel();
+  return ret;
+}
+
+static ssize_t cpuid_read(struct file * file, char * buf,
+                       size_t count, loff_t *ppos)
+{
+  u32 *tmp = (u32 *)buf;
+  u32 data[4];
+  size_t rv;
+  u32 reg = *ppos;
+  int cpu = minor(file->f_dentry->d_inode->i_rdev);
+  
+  if ( count % 16 )
+    return -EINVAL; /* Invalid chunk size */
+  
+  for ( rv = 0 ; count ; count -= 16 ) {
+    do_cpuid(cpu, reg, data);
+    if ( copy_to_user(tmp,&data,16) )
+      return -EFAULT;
+    tmp += 4;
+    *ppos = reg++;
+  }
+  
+  return ((char *)tmp) - buf;
+}
+
+static int cpuid_open(struct inode *inode, struct file *file)
+{
+  int cpu = minor(file->f_dentry->d_inode->i_rdev);
+  struct cpuinfo_x86 *c = &(cpu_data)[cpu];
+
+  if ( !(cpu_online_map & (1UL << cpu)) )
+    return -ENXIO;             /* No such CPU */
+  if ( c->cpuid_level < 0 )
+    return -EIO;               /* CPUID not supported */
+  
+  return 0;
+}
+
+/*
+ * File operations we support
+ */
+static struct file_operations cpuid_fops = {
+  owner:       THIS_MODULE,
+  llseek:      cpuid_seek,
+  read:                cpuid_read,
+  open:                cpuid_open,
+};
+
+int __init cpuid_init(void)
+{
+  if (register_chrdev(CPUID_MAJOR, "cpu/cpuid", &cpuid_fops)) {
+    printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n",
+          CPUID_MAJOR);
+    return -EBUSY;
+  }
+
+  return 0;
+}
+
+void __exit cpuid_exit(void)
+{
+  unregister_chrdev(CPUID_MAJOR, "cpu/cpuid");
+}
+
+module_init(cpuid_init);
+module_exit(cpuid_exit)
+
+EXPORT_NO_SYMBOLS;
+
+MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>");
+MODULE_DESCRIPTION("x86 generic CPUID driver");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c
new file mode 100644 (file)
index 0000000..13b1d31
--- /dev/null
@@ -0,0 +1,77 @@
+#include <asm/io.h>
+
+/* This is "wrong" address to access it, we should access it using
+   0xffff8000000b8000ul; but 0xffff8000000b8000ul is not available
+   early at boot. */
+#define VGABASE                0xffffffff800b8000ul    
+
+#define MAX_YPOS       25
+#define MAX_XPOS       80
+
+static int current_ypos = 1, current_xpos = 0; /* We want to print before clearing BSS */
+
+void
+early_clear (void)
+{
+       int k, i;
+       for(k = 0; k < MAX_YPOS; k++)
+               for(i = 0; i < MAX_XPOS; i++)
+                       writew(0, VGABASE + 2*(MAX_XPOS*k + i));
+       current_ypos = 0;
+}
+
+void
+early_puts (const char *str)
+{
+       char c;
+       int  i, k, j;
+
+       while ((c = *str++) != '\0') {
+               if (current_ypos >= MAX_YPOS) {
+#if 1
+                       /* scroll 1 line up */
+                       for(k = 1, j = 0; k < MAX_YPOS; k++, j++) {
+                               for(i = 0; i < MAX_XPOS; i++) {
+                                       writew(readw(VGABASE + 2*(MAX_XPOS*k + i)),
+                                              VGABASE + 2*(MAX_XPOS*j + i));
+                               }
+                       }
+                       for(i = 0; i < MAX_XPOS; i++) {
+                               writew(0x720, VGABASE + 2*(MAX_XPOS*j + i));
+                       }
+                       current_ypos = MAX_YPOS-1;
+#else
+                       /* MUCH faster */
+                       early_clear();
+                       current_ypos = 0;
+#endif
+               }
+               if (c == '\n') {
+                       current_xpos = 0;
+                       current_ypos++;
+               } else if (c != '\r')  {
+                       writew(((0x7 << 8) | (unsigned short) c),
+                              VGABASE + 2*(MAX_XPOS*current_ypos + current_xpos++));
+                       if (current_xpos >= MAX_XPOS) {
+                               current_xpos = 0;
+                               current_ypos++;
+                       }
+               }
+       }
+}
+
+static char buf[1024];
+
+int early_printk(const char *fmt, ...)
+{
+       va_list args;
+       int i;
+
+       va_start(args, fmt);
+       i = vsprintf(buf, fmt, args); /* hopefully i < sizeof(buf)-4 */
+       va_end(args);
+
+       early_puts(buf);
+
+       return i;
+}
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
new file mode 100644 (file)
index 0000000..e46bd97
--- /dev/null
@@ -0,0 +1,640 @@
+/*
+ *  linux/arch/x86_64/entry.S
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 2000, 2001, 2002  Andi Kleen SuSE Labs
+ *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
+ * 
+ *  $Id: entry.S,v 1.66 2001/11/11 17:47:47 ak Exp $           
+ */
+
+/*
+ * entry.S contains the system-call and fault low-level handling routines.
+ *
+ * NOTE: This code handles signal-recognition, which happens every time
+ * after an interrupt and after each system call.
+ * 
+ * Normal syscalls and interrupts don't save a full stack frame, this is 
+ * only done for syscall tracing, signals or fork/exec et.al.
+ * 
+ * A note on terminology:       
+ * - top of stack: Architecture defined interrupt frame from SS to RIP 
+ * at the top of the kernel process stack.     
+ * - partial stack frame: partially saved registers upto R11.
+ * - full stack frame: Like partial stack frame, but all register saved. 
+ *     
+ * TODO:        
+ * - schedule it carefully for the final hardware.
+ */
+
+#define ASSEMBLY 1
+#include <linux/config.h>
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/smp.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/calling.h>
+#include <asm/offset.h>
+#include <asm/msr.h>
+#include <asm/unistd.h>
+#include <asm/thread_info.h>
+       
+#define RIP_SYMBOL_NAME(x) x(%rip)
+
+       .code64
+
+#define PDAREF(field) %gs:field                        
+
+#ifdef CONFIG_PREEMPT
+#define preempt_stop cli
+#else
+#define preempt_stop
+#define retint_kernel retint_restore_args
+#endif 
+       
+/*
+ * C code is not supposed to know about undefined top of stack. Every time 
+ * a C function with an pt_regs argument is called from the SYSCALL based 
+ * fast path FIXUP_TOP_OF_STACK is needed.
+ * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
+ * manipulation.
+ */            
+               
+       /* %rsp:at FRAMEEND */ 
+       .macro FIXUP_TOP_OF_STACK tmp
+       movq    PDAREF(pda_oldrsp),\tmp
+       movq    \tmp,RSP(%rsp)
+       movq    $__USER_DS,SS(%rsp)
+       movq    $__USER_CS,CS(%rsp)
+       movq    RCX(%rsp),\tmp  /* get return address */
+       movq    \tmp,RIP(%rsp)
+       movq    R11(%rsp),\tmp  /* get eflags */
+       movq    \tmp,EFLAGS(%rsp)
+       .endm
+
+       .macro RESTORE_TOP_OF_STACK tmp,offset=0
+       movq   RSP-\offset(%rsp),\tmp
+       movq   \tmp,PDAREF(pda_oldrsp)
+       movq   RIP-\offset(%rsp),\tmp
+       movq   \tmp,RCX-\offset(%rsp)
+       movq   EFLAGS-\offset(%rsp),\tmp
+       movq   \tmp,R11-\offset(%rsp)
+       .endm
+
+       .macro FAKE_STACK_FRAME child_rip
+       /* push in order ss, rsp, eflags, cs, rip */
+       xorq %rax, %rax
+       pushq %rax /* ss */
+       pushq %rax /* rsp */
+       pushq %rax /* eflags */
+       pushq $__KERNEL_CS /* cs */
+       pushq \child_rip /* rip */
+       pushq   %rax /* orig rax */
+       .endm
+
+       .macro UNFAKE_STACK_FRAME
+       addq $8*6, %rsp
+       .endm
+
+       
+/*
+ * A newly forked process directly context switches into this.
+ */    
+ENTRY(ret_from_fork)
+       movq %rbx, %rdi
+       call schedule_tail
+       GET_THREAD_INFO(%rcx)
+       bt $TIF_SYSCALL_TRACE,threadinfo_flags(%rcx)
+       jc rff_trace
+rff_action:    
+       RESTORE_REST
+       cmpq $__KERNEL_CS,CS-ARGOFFSET(%rsp)    # from kernel_thread?
+       je   int_ret_from_sys_call
+       testl $_TIF_IA32,threadinfo_flags(%rcx)
+       jnz  int_ret_from_sys_call
+       RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
+       jmp ret_from_sys_call
+rff_trace:
+       movq %rsp,%rdi
+       call syscall_trace
+       jmp rff_action
+
+/*
+ * System call entry. Upto 6 arguments in registers are supported.
+ *
+ * SYSCALL does not save anything on the stack and does not change the
+ * stack pointer.
+ */
+               
+/*
+ * Register setup:     
+ * rax  system call number
+ * rdi  arg0
+ * rcx  return address for syscall/sysret, C arg3 
+ * rsi  arg1
+ * rdx  arg2   
+ * r10  arg4   (--> moved to rcx for C, serves as TOS flag afterwards) 
+ * r8   arg5
+ * r9   arg6
+ * r11  eflags for syscall/sysret, temporary for C
+ * r12-r15,rbp,rbx saved by C code, not touched.               
+ * 
+ * Interrupts are off on entry.
+ * Only called from user space.
+ *
+ * XXX need to add a flag for thread_saved_pc/KSTK_*.                  
+ */                                    
+
+ENTRY(system_call)
+       swapgs
+       movq    %rsp,PDAREF(pda_oldrsp) 
+       movq    PDAREF(pda_kernelstack),%rsp
+       pushq %rax
+       sti                                     
+       SAVE_ARGS
+       GET_THREAD_INFO(%rcx)
+       bt    $TIF_SYSCALL_TRACE,threadinfo_flags(%rcx) 
+       jc    tracesys
+       cmpq $__NR_syscall_max,%rax
+       ja badsys
+       movq %r10,%rcx
+       call *sys_call_table(,%rax,8)  # XXX:    rip relative
+       movq %rax,RAX-ARGOFFSET(%rsp)
+/*
+ * Syscall return path ending with SYSRET (fast path)
+ * Has incomplete stack frame and undefined top of stack. 
+ */            
+ENTRY(ret_from_sys_call)       
+       GET_THREAD_INFO(%rcx)
+       cli
+       movl threadinfo_flags(%rcx),%edx
+       andl $_TIF_WORK_MASK,%edx       # tracesys has been already checked.
+       jnz  sysret_careful 
+sysret_restore_args:
+       RESTORE_ARGS
+       movq    PDAREF(pda_oldrsp),%rsp
+       swapgs
+       SYSRET64
+
+sysret_careful:
+       bt $TIF_NEED_RESCHED,%edx
+       jnc 1f
+       call schedule
+       jmp ret_from_sys_call
+1:     sti
+       SAVE_REST
+       FIXUP_TOP_OF_STACK %rax
+       xorq %rsi,%rsi          # oldset
+       movq %rsp,%rdi          # &ptregs       
+       call do_notify_resume
+       RESTORE_TOP_OF_STACK %rax
+       RESTORE_REST
+       jmp ret_from_sys_call
+       
+tracesys:                       
+       SAVE_REST
+       movq $-ENOSYS,RAX(%rsp)
+       FIXUP_TOP_OF_STACK %rdi
+       movq %rsp,%rdi
+       call syscall_trace
+       LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
+       RESTORE_REST
+       cmpq $__NR_syscall_max,%rax
+       ja  1f
+       movq %r10,%rcx  /* fixup for C */
+       movl $1,%r10d   /* set TOS flag */ 
+       call *sys_call_table(,%rax,8)
+       movq %rax,RAX-ARGOFFSET(%rsp)
+       SAVE_REST
+1:     movq %rsp,%rdi
+       call syscall_trace
+       RESTORE_TOP_OF_STACK %rbx
+       RESTORE_REST
+       jmp ret_from_sys_call
+               
+badsys:
+       movq $-ENOSYS,RAX-ARGOFFSET(%rsp)       
+       jmp ret_from_sys_call
+
+/* 
+ * Syscall return path ending with IRET.
+ * Has correct top of stack, but partial stack frame.
+ */    
+ENTRY(int_ret_from_sys_call)   
+       cmpq $__KERNEL_CS,CS-ARGOFFSET(%rsp)    # in kernel syscall?
+       je int_restore_args
+       movl $_TIF_ALLWORK_MASK,%esi
+int_with_reschedule:
+       GET_THREAD_INFO(%rcx)
+       cli
+       movl threadinfo_flags(%rcx),%edx
+       andl %esi,%edx
+       jnz   int_careful
+       swapgs
+int_restore_args:              
+       RESTORE_ARGS    
+       addq $8,%rsp    # Remove oldrax
+       iretq
+
+int_careful:
+       sti
+       bt $TIF_NEED_RESCHED,%edx
+       jnc  int_very_careful
+       call schedule
+       movl $_TIF_ALLWORK_MASK,%esi
+       jmp int_with_reschedule
+int_very_careful:
+       SAVE_REST
+       leaq syscall_trace(%rip),%rbp
+       leaq do_notify_resume(%rip),%rbx
+       bt $TIF_SYSCALL_TRACE,%edx
+       cmovcq %rbp,%rbx
+       xorq %rsi,%rsi          # oldset -> arg2 
+       movq %rsp,%rdi          # &ptregs -> arg1
+       call *%rbx
+       RESTORE_REST
+       movl $_TIF_WORK_MASK,%esi
+       jmp int_with_reschedule 
+               
+/* 
+ * Certain special system calls that need to save a complete full stack frame.
+ */                                                            
+       
+       .macro PTREGSCALL label,func
+       .globl \label
+\label:
+       leaq    \func(%rip),%rax
+       jmp     ptregscall_common
+       .endm
+
+       PTREGSCALL stub_clone, sys_clone
+       PTREGSCALL stub_fork, sys_fork
+       PTREGSCALL stub_vfork, sys_vfork
+       PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend
+       PTREGSCALL stub_sigaltstack, sys_sigaltstack
+       PTREGSCALL stub_iopl, sys_iopl
+
+ENTRY(ptregscall_common)
+       popq %r11
+       SAVE_REST
+       movq %r11, %r15
+       FIXUP_TOP_OF_STACK %r11
+       call *%rax
+       RESTORE_TOP_OF_STACK %r11
+       movq %r15, %r11
+       RESTORE_REST
+       pushq %r11
+       ret
+       
+ENTRY(stub_execve)
+       popq %r11
+       SAVE_REST
+       movq %r11, %r15
+       FIXUP_TOP_OF_STACK %r11
+       call sys_execve
+       GET_THREAD_INFO(%rcx)
+       testl $_TIF_IA32,threadinfo_flags(%rcx)
+       jnz exec_32bit
+       RESTORE_TOP_OF_STACK %r11
+       movq %r15, %r11
+       RESTORE_REST
+       push %r11
+       ret
+
+exec_32bit:
+       movq %rax,RAX(%rsp)
+       RESTORE_REST
+       jmp int_ret_from_sys_call
+       
+/*
+ * sigreturn is special because it needs to restore all registers on return.
+ * This cannot be done with SYSRET, so use the IRET return path instead.
+ */                
+ENTRY(stub_rt_sigreturn)
+       addq $8, %rsp           
+       SAVE_REST
+       FIXUP_TOP_OF_STACK %r11
+       call sys_rt_sigreturn
+       movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
+       RESTORE_REST
+       jmp int_ret_from_sys_call
+
+/* 
+ * Interrupt entry/exit.
+ *
+ * Interrupt entry points save only callee clobbered registers in fast path.
+ *     
+ * Entry runs with interrupts off.     
+ */ 
+
+/* 0(%rsp): interrupt number */ 
+ENTRY(common_interrupt)
+       cmpq $__KERNEL_CS,16(%rsp)
+       je   1f
+       swapgs
+1:     cld
+       SAVE_ARGS
+#ifdef CONFIG_PREEMPT
+       GET_THREAD_INFO(%rdx)
+       incl threadinfo_preempt_count(%rdx)
+#endif         
+       leaq -ARGOFFSET(%rsp),%rdi      # arg1 for handler
+       addl $1,PDAREF(pda_irqcount)    # XXX: should be merged with irq.c irqcount
+       movq PDAREF(pda_irqstackptr),%rax
+       cmoveq %rax,%rsp                                                        
+       pushq %rdi                      # save old stack        
+       call do_IRQ
+       /* 0(%rsp): oldrsp-ARGOFFSET */
+       .globl ret_from_intr
+ret_from_intr:         
+       popq  %rdi
+       cli     
+       subl $1,PDAREF(pda_irqcount)
+       leaq ARGOFFSET(%rdi),%rsp
+exit_intr:             
+       GET_THREAD_INFO(%rcx)
+#ifdef CONFIG_PREEMPT  
+       decl threadinfo_preempt_count(%rcx)
+#endif
+       cmpq $__KERNEL_CS,CS-ARGOFFSET(%rsp)
+       je retint_kernel
+       
+       /* Interrupt came from user space */
+       /*
+        * Shared return path for exceptions and interrupts that came from user space.
+        * Has a correct top of stack, but a partial stack frame
+        * %rcx: thread info. Interrupts off.
+        */             
+retint_with_reschedule:
+       testl $_TIF_WORK_MASK,threadinfo_flags(%rcx)
+       jnz  retint_careful
+retint_swapgs:         
+       swapgs 
+retint_restore_args:                           
+       RESTORE_ARGS                                            
+       addq $8,%rsp
+       iretq
+
+retint_careful:
+       movl  threadinfo_flags(%rcx),%edx
+       bt    $TIF_NEED_RESCHED,%edx
+       jnc   retint_signal
+       sti
+       call  schedule
+retint_next_try:               
+       GET_THREAD_INFO(%rcx)
+       cli
+       jmp retint_with_reschedule
+retint_signal:
+       testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME),%edx
+       jz    retint_swapgs
+       sti
+       SAVE_REST
+       movq $-1,ORIG_RAX(%rsp)                         
+       xorq %rsi,%rsi          # oldset
+       movq %rsp,%rdi          # &pt_regs
+       call do_notify_resume
+       RESTORE_REST
+       jmp retint_next_try
+
+#ifdef CONFIG_PREEMPT
+       /* Returning to kernel space. Check if we need preemption */
+       /* rcx:  threadinfo. interrupts off. */
+       .p2align
+retint_kernel: 
+       cmpl $0,threadinfo_preempt_count(%rcx)
+       jnz  retint_restore_args
+       bt  $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
+       jnc  retint_restore_args
+       movl PDAREF(pda___local_bh_count),%eax
+       addl PDAREF(pda___local_irq_count),%eax
+       jnz  retint_restore_args
+       incl threadinfo_preempt_count(%rcx)
+       sti
+       call preempt_schedule
+       cli
+       jmp exit_intr
+#endif 
+       
+/*
+ * Exception entry points.
+ */            
+       .macro zeroentry sym
+       pushq $0        /* push error code/oldrax */ 
+       pushq %rax      /* push real oldrax to the rdi slot */ 
+       leaq  RIP_SYMBOL_NAME(\sym),%rax
+       jmp error_entry
+       .endm   
+
+       .macro errorentry sym
+       pushq %rax
+       leaq  RIP_SYMBOL_NAME(\sym),%rax
+       jmp error_entry
+       .endm
+
+/*
+ * Exception entry point. This expects an error code/orig_rax on the stack
+ * and the exception handler in %rax.  
+ */                                            
+       ALIGN
+error_entry:
+       cmpq $__KERNEL_CS,24(%rsp)      
+       je  error_kernelspace   
+       swapgs
+error_kernelspace:                     
+       sti
+       /* rdi slot contains rax, oldrax contains error code */
+       pushq %rsi
+       movq  8(%rsp),%rsi      /* load rax */
+       pushq %rdx
+       pushq %rcx
+       pushq %rsi      /* store rax */ 
+       pushq %r8
+       pushq %r9
+       pushq %r10
+       pushq %r11
+       cld     
+       SAVE_REST
+       movq %rdi,RDI(%rsp)     
+       movq %rsp,%rdi
+       movq ORIG_RAX(%rsp),%rsi        /* get error code */ 
+       movq $-1,ORIG_RAX(%rsp)
+       call *%rax
+error_exit:            
+       RESTORE_REST
+       cli
+       GET_THREAD_INFO(%rcx)   
+       cmpq $__KERNEL_CS,CS-ARGOFFSET(%rsp)
+       je retint_kernel
+       jmp retint_with_reschedule
+
+/*
+ * Create a kernel thread.
+ *
+ * C extern interface:
+ *     extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+ *
+ * asm input arguments:
+ *     rdi: fn, rsi: arg, rdx: flags
+ */
+ENTRY(kernel_thread)
+       FAKE_STACK_FRAME $child_rip
+       SAVE_ALL
+
+       # rdi: flags, rsi: usp, rdx: will be &pt_regs
+       movq %rdx,%rdi
+       orq  $CLONE_VM, %rdi
+
+       movq $-1, %rsi
+
+       movq %rsp, %rdx
+
+       # clone now
+       call do_fork
+       # save retval on the stack so it's popped before `ret`
+       movq %rax, RAX(%rsp)
+
+       /*
+        * It isn't worth to check for reschedule here,
+        * so internally to the x86_64 port you can rely on kernel_thread()
+        * not to reschedule the child before returning, this avoids the need
+        * of hacks for example to fork off the per-CPU idle tasks.
+         * [Hopefully no generic code relies on the reschedule -AK]    
+        */
+       RESTORE_ALL
+       UNFAKE_STACK_FRAME
+       ret
+       
+child_rip:
+       /*
+        * Here we are in the child and the registers are set as they were
+        * at kernel_thread() invocation in the parent.
+        */
+       movq %rdi, %rax
+       movq %rsi, %rdi
+       call *%rax
+       # exit
+       xorq %rdi, %rdi
+       call do_exit
+
+/*
+ * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
+ *
+ * C extern interface:
+ *      extern long execve(char *name, char **argv, char **envp)
+ *
+ * asm input arguments:
+ *     rdi: name, rsi: argv, rdx: envp
+ *
+ * We want to fallback into:
+ *     extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
+ *
+ * do_sys_execve asm fallback arguments:
+ *     rdi: name, rsi: argv, rdx: envp, fake frame on the stack
+ */
+ENTRY(execve)
+       FAKE_STACK_FRAME $0
+       SAVE_ALL        
+       call sys_execve
+       movq %rax, RAX(%rsp)    
+       RESTORE_REST
+       testq %rax,%rax
+       je int_ret_from_sys_call
+       RESTORE_ARGS
+       UNFAKE_STACK_FRAME
+       ret
+
+ENTRY(page_fault)
+       errorentry do_page_fault
+
+ENTRY(coprocessor_error)
+       zeroentry do_coprocessor_error
+
+ENTRY(simd_coprocessor_error)
+       zeroentry do_simd_coprocessor_error     
+
+
+ENTRY(device_not_available)
+       cmpq $0,(%rsp)
+       jl  1f
+       swapgs
+1:     pushq $-1       
+       SAVE_ALL
+       movq  %cr0,%rax
+       leaq  math_state_restore(%rip),%rcx
+       leaq  math_emulate(%rip),%rbx
+       testl $0x4,%eax
+       cmoveq %rcx,%rbx
+       preempt_stop
+       call  *%rbx
+       jmp  error_exit
+
+ENTRY(debug)
+       zeroentry do_debug
+
+       /* XXX checkme */ 
+ENTRY(nmi)
+       cmpq $0,(%rsp)
+       jl 1f
+       swapgs
+1:     pushq $-1
+       SAVE_ALL
+       movq %rsp,%rdi
+       call do_nmi
+       RESTORE_ALL
+       addq $8,%rsp
+       cmpq $0,(%rsp)
+       jl 2f
+       swapgs
+2:     iretq
+       
+ENTRY(int3)
+       zeroentry do_int3       
+
+ENTRY(overflow)
+       zeroentry do_overflow
+
+ENTRY(bounds)
+       zeroentry do_bounds
+
+ENTRY(invalid_op)
+       zeroentry do_invalid_op 
+
+ENTRY(coprocessor_segment_overrun)
+       zeroentry do_coprocessor_segment_overrun
+
+ENTRY(reserved)
+       zeroentry do_reserved
+
+ENTRY(double_fault)
+       errorentry do_double_fault      
+
+ENTRY(invalid_TSS)
+       errorentry do_invalid_TSS
+
+ENTRY(segment_not_present)
+       errorentry do_segment_not_present
+
+ENTRY(stack_segment)
+       errorentry do_stack_segment
+
+ENTRY(general_protection)
+       errorentry do_general_protection
+
+ENTRY(alignment_check)
+       errorentry do_alignment_check
+
+ENTRY(divide_error)
+       errorentry do_divide_error
+
+ENTRY(spurious_interrupt_bug)
+       zeroentry do_spurious_interrupt_bug
+
+ENTRY(__bad_intr)
+       pushq $-1
+       SAVE_ALL
+       call  bad_intr
+       RESTORE_ALL
+       addq $8,%rsp
+       iretq
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
new file mode 100644 (file)
index 0000000..5084567
--- /dev/null
@@ -0,0 +1,342 @@
+/*
+ *  linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit
+ *
+ *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+ *  Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
+ *  Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
+ *  Copyright (C) 2001 2002 Andi Kleen <ak@suse.de>
+ *
+ *  $Id: head.S,v 1.41 2001/07/05 23:43:45 ak Exp $
+ */
+.code64
+.text
+
+#include <linux/linkage.h>
+#include <linux/threads.h>
+#include <asm/desc.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/offset.h>
+       
+/* we don't able to switch in one step to final KERNEL ADDRESS SPACE
+ * because we need identity-mapped pages on setup so define __START_KERNEL to
+ * 0x100000 for this stage
+ * 
+ */
+
+
+startup_32:
+.code32
+       /*
+        * At this point the CPU runs in 32bit protected mode (CS.D = 1) with
+        * paging disabled and the point of this file is to switch to 64bit
+        * long mode with a kernel mapping for kerneland to jump into the
+        * kernel virtual addresses.
+        * There is no stack until we set one up.
+        */
+
+       /* As first check if extended functions are implemented */
+       movl    $0x80000000, %eax
+       cpuid
+       cmpl    $0x80000000, %eax
+       jbe     no_long_mode
+       /* Check if long mode is implemented */
+       mov     $0x80000001, %eax
+       cpuid
+       btl     $29, %edx
+       jnc     no_long_mode
+
+       /*
+        * Prepare for entering 64bits mode
+        */
+
+       /* Enable PAE mode and PGE */
+       xorl    %eax, %eax
+       btsl    $5, %eax
+       btsl    $7, %eax
+       movl    %eax, %cr4
+
+       /* Setup early boot stage 4 level pagetables */
+       movl    $0x101000, %eax
+       movl    %eax, %cr3
+
+       /* Setup EFER (Extended Feature Enable Register) */
+       movl    $0xc0000080, %ecx
+       rdmsr
+       /* Fool rdmsr and reset %eax to avoid dependences */
+       xorl    %eax, %eax
+       /* Enable Long Mode */
+       btsl    $8, %eax
+       /* Enable System Call */
+       btsl    $0, %eax
+       /* Make changes effective */
+       wrmsr
+
+       xorl    %eax, %eax
+       /* Enable paging and in turn activate Long Mode */
+       btsl    $31, %eax
+       /* Enable protected mode */
+       btsl    $0, %eax
+       /* Enable MP */
+       btsl    $1, %eax
+       /* Enable ET */
+       btsl    $4, %eax
+       /* Enable NE */
+       btsl    $5, %eax
+       /* Enable WP */
+       btsl    $16, %eax
+       /* Enable AM */
+       btsl    $18, %eax
+       /* Make changes effective */
+       movl    %eax, %cr0
+       jmp     reach_compatibility_mode
+reach_compatibility_mode:
+       
+       /*
+        * At this point we're in long mode but in 32bit compatibility mode
+        * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
+        * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we load
+        * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
+        */
+
+       /* Load new GDT with the 64bit segment using 32bit descriptor */
+       /* to avoid 32bit relocations we use fixed adresses here */
+       movl    $0x100F00, %eax
+       lgdt    (%eax)
+       movl    $0x100F10, %eax
+       /* Finally jump in 64bit mode */
+       ljmp    *(%eax)
+
+.code64
+reach_long64:
+       /*
+        * Where we're running at 0x0000000000100000, and yes, finally
+        * in 64bit mode.
+        */
+       .globl  init_rsp
+
+       /* Setup the first kernel stack (this instruction is modified by smpboot) */
+       .byte 0x48, 0xb8        /* movq *init_rsp,%rax */ 
+init_rsp:
+       .quad init_thread_union+THREAD_SIZE
+       movq    %rax, %rsp
+
+       /* zero EFLAGS after setting rsp */
+       pushq $0
+       popfq
+
+       /*
+        * We must switch to a new descriptor in kernel space for the GDT
+        * because soon the kernel won't have access anymore to the userspace
+        * addresses where we're currently running on. We have to do that here
+        * because in 32bit we couldn't load a 64bit linear address.
+        */
+       lgdt    pGDT64
+
+       /* esi is pointer to real mode structure with interesting info.
+          pass it to C */
+       movl    %esi, %edi
+
+       movl $__KERNEL_DS,%eax
+       movl %eax,%ss
+       movl %eax,%ds   
+       movl %eax,%es
+                       
+       /* Finally jump to run C code and to be on real kernel address
+        * Since we are running on identity-mapped space we have to jump
+        * to the full 64bit address , this is only possible as indirect
+        * jump
+        */
+       movq    initial_code(%rip),%rax
+       jmp     *%rax
+
+
+       /* SMP bootup changes this */   
+       .globl  initial_code
+initial_code:
+       .quad   x86_64_start_kernel
+
+.code32
+ENTRY(no_long_mode)
+       /* This isn't an x86-64 CPU so hang */
+1:
+       jmp     1b
+
+.org 0xf00
+pGDT32:
+       .word   gdt32_end-gdt_table32
+       .quad   gdt_table32-__START_KERNEL+0x100000
+
+.org 0xf10     
+ljumpvector:
+       .long   reach_long64-__START_KERNEL+0x100000
+       .word   __KERNEL_CS
+
+ENTRY(stext)
+ENTRY(_stext)
+
+       /*
+        * This default setting generates an ident mapping at address 0x100000
+        * and a mapping for the kernel that precisely maps virtual address
+        * 0xffffffff80000000 to physical address 0x000000. (always using
+        * 2Mbyte large pages provided by PAE mode)
+        */
+.org 0x1000
+ENTRY(level4_pgt)
+       .quad   0x0000000000102007              /* -> level3_ident_pgt */
+       .fill   255,8,0
+       /* __PAGE_OFFSET 0xffff800000000000 */
+       .quad   0x000000000010a007
+       .fill   254,8,0
+       /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+       .quad   0x0000000000103007              /* -> level3_kernel_pgt */
+
+.org 0x2000
+/* Kernel does not "know" about 4-th level of page tables. */
+ENTRY(swapper_pg_dir)
+ENTRY(level3_ident_pgt)
+       .quad   0x0000000000104007
+       .fill   511,8,0
+
+.org 0x3000
+ENTRY(level3_kernel_pgt)
+       .fill   510,8,0
+       /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
+       .quad   0x0000000000105007              /* -> level2_kernel_pgt */
+       .fill   1,8,0
+
+.org 0x4000
+ENTRY(level2_ident_pgt)
+       /* 2 Mbytes are enough, this is necessary only for head.S */
+       .quad   0x0000000000000283
+       /* .fill        511,8,0 */
+       /* Jan needs more than 2Mbytes, so set a 40Mbyte mapping instead */
+       .quad   0x0000000000200183
+       .quad   0x0000000000400183
+       .quad   0x0000000000600183
+       .quad   0x0000000000800183
+       .quad   0x0000000000A00183
+       .quad   0x0000000000C00183
+       .quad   0x0000000000E00183
+       .quad   0x0000000001000183
+       .quad   0x0000000001200183
+       .quad   0x0000000001400183
+       .quad   0x0000000001600183
+       .quad   0x0000000001800183
+       .quad   0x0000000001A00183
+       .quad   0x0000000001C00183
+       .quad   0x0000000001E00183
+       .quad   0x0000000002000183
+       .quad   0x0000000002200183
+       .quad   0x0000000002400183
+       .quad   0x0000000002600183
+       .fill   492,8,0
+       
+.org 0x5000
+ENTRY(level2_kernel_pgt)
+       /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
+       .quad   0x0000000000000183
+       .quad   0x0000000000200183
+       .quad   0x0000000000400183
+       .quad   0x0000000000600183
+       .quad   0x0000000000800183
+       .quad   0x0000000000A00183
+       .quad   0x0000000000C00183
+       .quad   0x0000000000E00183
+       .quad   0x0000000001000183
+       .quad   0x0000000001200183
+       .quad   0x0000000001400183
+       .quad   0x0000000001600183
+       .quad   0x0000000001800183
+       .quad   0x0000000001A00183
+       .quad   0x0000000001C00183
+       .quad   0x0000000001E00183
+       .quad   0x0000000002000183
+       .quad   0x0000000002200183
+       .quad   0x0000000002400183
+       .quad   0x0000000002600183
+       /*
+        * We could go ahead without any downside (except typing programmer
+        * wise :) but 40Mbyte are just enough for the kernel statically
+        * linked part (and extending it is trivial).
+        */
+       .fill   492,8,0
+
+.org 0x6000
+ENTRY(empty_zero_page)
+
+.org 0x7000
+ENTRY(empty_bad_page)
+
+.org 0x8000
+ENTRY(empty_bad_pte_table)
+
+.org 0x9000
+ENTRY(empty_bad_pmd_table)
+
+.org 0xa000
+ENTRY(level3_physmem_pgt)
+       .quad   0x0000000000105007              /* -> level2_kernel_pgt (so that __va works even before pagetable_init) */
+
+
+
+.org 0xb000
+
+
+.data
+
+.globl SYMBOL_NAME(gdt)
+
+       .word 0
+       .align 16
+       .word 0
+pGDT64:
+       .word   gdt_end-gdt_table
+SYMBOL_NAME_LABEL(gdt)
+       .quad   gdt_table
+       
+
+.align 64 /* cacheline aligned */
+ENTRY(gdt_table32)
+       .quad   0x0000000000000000      /* This one is magic */
+       .quad   0x0000000000000000      /* unused */
+       .quad   0x00af9a000000ffff      /* __KERNEL_CS */
+gdt32_end:     
+       
+/* We need valid kernel segments for data and code in long mode too
+ * IRET will check the segment types  kkeil 2000/10/28
+ * Also sysret mandates a special GDT layout 
+ */
+                               
+.align 64 /* cacheline aligned, keep this synchronized with asm/desc.h */
+ENTRY(gdt_table)
+       .quad   0x0000000000000000      /* This one is magic */
+       .quad   0x0000000000000000      /* unused */
+       .quad   0x00af9a000000ffff      /* __KERNEL_CS */
+       .quad   0x00cf92000000ffff      /* __KERNEL_DS */
+       .quad   0x00cffe000000ffff      /* __USER32_CS */
+       .quad   0x00cff2000000ffff      /* __USER_DS, __USER32_DS  */           
+       .quad   0x00affa000000ffff      /* __USER_CS */
+
+       .globl  tss_start
+tss_start:
+       .rept NR_CPUS           
+       .quad   0,0                                     /* TSS descriptors. filled in later */
+       .endr
+       .globl ldt_start
+ldt_start:     
+       .rept NR_CPUS
+       .quad   0,0                                     /* LDT descriptors. filled in later */ 
+       .endr   
+gdt_end:       
+       .globl gdt_end
+
+       .align  64
+ENTRY(idt_table)       
+       .rept   256
+       .quad   0
+       .quad   0
+       .endr
+       
+.section .text.lock
+ENTRY(stext_lock)
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c
new file mode 100644 (file)
index 0000000..237b94f
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ *  linux/arch/x86_64/kernel/head64.c -- prepare to run common code
+ *
+ *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+ *
+ *  $Id: head64.c,v 1.22 2001/07/06 14:28:20 ak Exp $
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#include <asm/processor.h>
+
+/* Don't add a printk in there. printk relies on the PDA which is not initialized 
+   yet. */
+static void __init clear_bss(void)
+{
+       extern char __bss_start[], __bss_end[];
+       memset(__bss_start, 0,
+              (unsigned long) __bss_end - (unsigned long) __bss_start);
+}
+
+extern char x86_boot_params[2048];
+
+#define NEW_CL_POINTER         0x228   /* Relative to real mode data */
+#define OLD_CL_MAGIC_ADDR      0x90020
+#define OLD_CL_MAGIC            0xA33F
+#define OLD_CL_BASE_ADDR        0x90000
+#define OLD_CL_OFFSET           0x90022
+
+extern char saved_command_line[];
+
+static void __init copy_bootdata(char *real_mode_data)
+{
+       int new_data;
+       char * command_line;
+
+       memcpy(x86_boot_params, real_mode_data, 2048); 
+       new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
+       if (!new_data) {
+               if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
+                       printk("so old bootloader that it does not support commandline?!\n");
+                       return;
+               }
+               new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
+               printk("old bootloader convention, maybe loadlin?\n");
+       }
+       command_line = (char *) ((u64)(new_data));
+       memcpy(saved_command_line, command_line, 2048);
+       printk("Bootdata ok (command line is %s)\n", saved_command_line);       
+}
+
+static void __init setup_boot_cpu_data(void)
+{
+       int dummy, eax;
+
+       /* get vendor info */
+       cpuid(0, &boot_cpu_data.cpuid_level,
+             (int *)&boot_cpu_data.x86_vendor_id[0],
+             (int *)&boot_cpu_data.x86_vendor_id[8],
+             (int *)&boot_cpu_data.x86_vendor_id[4]);
+
+       /* get cpu type */
+       cpuid(1, &eax, &dummy, &dummy, &boot_cpu_data.x86_capability[0]);
+       boot_cpu_data.x86 = (eax >> 8) & 0xf;
+       boot_cpu_data.x86_model = (eax >> 4) & 0xf;
+       boot_cpu_data.x86_mask = eax & 0xf;
+}
+
+extern void start_kernel(void), pda_init(int); 
+
+void __init x86_64_start_kernel(char * real_mode_data)
+{
+       clear_bss();
+       pda_init(0);
+
+       copy_bootdata(real_mode_data);
+       setup_boot_cpu_data();
+
+
+       start_kernel();
+}
diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c
new file mode 100644 (file)
index 0000000..c024613
--- /dev/null
@@ -0,0 +1,439 @@
+/*
+ *  linux/arch/x86_64/kernel/i387.c
+ *
+ *  Copyright (C) 1994 Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *  General FPU state handling cleanups
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/sigcontext.h>
+#include <asm/user.h>
+#include <asm/ptrace.h>
+#include <asm/uaccess.h>
+
+#define HAVE_HWFP 1
+
+/*
+ * The _current_ task is using the FPU for the first time
+ * so initialize it and set the mxcsr to its default
+ * value at reset if we support XMM instructions and then
+ * remeber the current task has used the FPU.
+ */
+void init_fpu(void)
+{
+       __asm__("fninit");
+       if ( cpu_has_xmm )
+               load_mxcsr(0x1f80);
+               
+       current->used_math = 1;
+}
+
+/*
+ * FPU lazy state save handling.
+ */
+
+static inline void __save_init_fpu( struct task_struct *tsk )
+{
+       if ( cpu_has_fxsr ) {
+               asm volatile( "fxsave %0 ; fnclex"
+                             : "=m" (tsk->thread.i387.fxsave) );
+       } else {
+               asm volatile( "fnsave %0 ; fwait"
+                             : "=m" (tsk->thread.i387.fsave) );
+       }
+       clear_tsk_thread_flag(tsk, TIF_USEDFPU);
+}
+
+void save_init_fpu( struct task_struct *tsk )
+{
+       __save_init_fpu(tsk);
+       stts();
+}
+
+void kernel_fpu_begin(void)
+{
+       preempt_disable();
+       if (test_thread_flag(TIF_USEDFPU)) {
+               __save_init_fpu(current);
+               return;
+       }
+       clts();
+}
+
+void restore_fpu( struct task_struct *tsk )
+{
+       if ( cpu_has_fxsr ) {
+               asm volatile( "fxrstor %0"
+                             : : "m" (tsk->thread.i387.fxsave) );
+       } else {
+               asm volatile( "frstor %0"
+                             : : "m" (tsk->thread.i387.fsave) );
+       }
+}
+
+/*
+ * FPU tag word conversions.
+ */
+
+static inline unsigned short twd_i387_to_fxsr( unsigned short twd )
+{
+       unsigned int tmp; /* to avoid 16 bit prefixes in the code */
+       /* Transform each pair of bits into 01 (valid) or 00 (empty) */
+        tmp = ~twd;
+        tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */
+        /* and move the valid bits to the lower byte. */
+        tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
+        tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
+        tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+        return tmp;
+}
+
+static inline u32 twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave )
+{
+       struct _fpxreg *st = NULL;
+       u32 twd = (u32) fxsave->twd;
+       u32 tag;
+       u32 ret = 0xffff0000;
+       int i;
+
+#define FPREG_ADDR(f, n)       ((char *)&(f)->st_space + (n) * 16);
+
+       for ( i = 0 ; i < 8 ; i++ ) {
+               if ( twd & 0x1 ) {
+                       st = (struct _fpxreg *) FPREG_ADDR( fxsave, i );
+
+                       switch ( st->exponent & 0x7fff ) {
+                       case 0x7fff:
+                               tag = 2;                /* Special */
+                               break;
+                       case 0x0000:
+                               if ( !st->significand[0] &&
+                                    !st->significand[1] &&
+                                    !st->significand[2] &&
+                                    !st->significand[3] ) {
+                                       tag = 1;        /* Zero */
+                               } else {
+                                       tag = 2;        /* Special */
+                               }
+                               break;
+                       default:
+                               if ( st->significand[3] & 0x8000 ) {
+                                       tag = 0;        /* Valid */
+                               } else {
+                                       tag = 2;        /* Special */
+                               }
+                               break;
+                       }
+               } else {
+                       tag = 3;                        /* Empty */
+               }
+               ret |= (tag << (2 * i));
+               twd = twd >> 1;
+       }
+       return ret;
+}
+
+/*
+ * FPU state interaction.
+ */
+
+unsigned short get_fpu_cwd( struct task_struct *tsk )
+{
+       if ( cpu_has_fxsr ) {
+               return tsk->thread.i387.fxsave.cwd;
+       } else {
+               return (unsigned short)tsk->thread.i387.fsave.cwd;
+       }
+}
+
+unsigned short get_fpu_swd( struct task_struct *tsk )
+{
+       if ( cpu_has_fxsr ) {
+               return tsk->thread.i387.fxsave.swd;
+       } else {
+               return (unsigned short)tsk->thread.i387.fsave.swd;
+       }
+}
+
+unsigned short get_fpu_twd( struct task_struct *tsk )
+{
+       if ( cpu_has_fxsr ) {
+               return tsk->thread.i387.fxsave.twd;
+       } else {
+               return (unsigned short)tsk->thread.i387.fsave.twd;
+       }
+}
+
+unsigned short get_fpu_mxcsr( struct task_struct *tsk )
+{
+       if ( cpu_has_xmm ) {
+               return tsk->thread.i387.fxsave.mxcsr;
+       } else {
+               return 0x1f80;
+       }
+}
+
+void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd )
+{
+       if ( cpu_has_fxsr ) {
+               tsk->thread.i387.fxsave.cwd = cwd;
+       } else {
+               tsk->thread.i387.fsave.cwd = ((u32)cwd | 0xffff0000);
+       }
+}
+
+void set_fpu_swd( struct task_struct *tsk, unsigned short swd )
+{
+       if ( cpu_has_fxsr ) {
+               tsk->thread.i387.fxsave.swd = swd;
+       } else {
+               tsk->thread.i387.fsave.swd = ((u32)swd | 0xffff0000);
+       }
+}
+
+void set_fpu_twd( struct task_struct *tsk, unsigned short twd )
+{
+       if ( cpu_has_fxsr ) {
+               tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd);
+       } else {
+               tsk->thread.i387.fsave.twd = ((u32)twd | 0xffff0000);
+       }
+}
+
+void set_fpu_mxcsr( struct task_struct *tsk, unsigned short mxcsr )
+{
+       if ( cpu_has_xmm ) {
+               tsk->thread.i387.fxsave.mxcsr = (mxcsr & 0xffbf);
+       }
+}
+
+/*
+ * FXSR floating point environment conversions.
+ */
+
+static inline int convert_fxsr_to_user( struct _fpstate *buf,
+                                       struct i387_fxsave_struct *fxsave )
+{
+       u32 env[7];
+       struct _fpreg *to;
+       struct _fpxreg *from;
+       int i;
+
+       env[0] = (u32)fxsave->cwd | 0xffff0000;
+       env[1] = (u32)fxsave->swd | 0xffff0000;
+       env[2] = twd_fxsr_to_i387(fxsave);
+       env[3] = fxsave->fip;
+       env[4] = fxsave->fcs | ((u32)fxsave->fop << 16);
+       env[5] = fxsave->foo;
+       env[6] = fxsave->fos;
+
+       if ( __copy_to_user( buf, env, 7 * sizeof(u32) ) )
+               return 1;
+
+       to = &buf->_st[0];
+       from = (struct _fpxreg *) &fxsave->st_space[0];
+       for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+               if ( __copy_to_user( to, from, sizeof(*to) ) )
+                       return 1;
+       }
+       return 0;
+}
+
+static inline int convert_fxsr_from_user( struct i387_fxsave_struct *fxsave,
+                                         struct _fpstate *buf )
+{
+       u32 env[7];
+       struct _fpxreg *to;
+       struct _fpreg *from;
+       int i;
+
+       if ( __copy_from_user( env, buf, 7 * sizeof(u32) ) )
+               return 1;
+
+       fxsave->cwd = (unsigned short)(env[0] & 0xffff);
+       fxsave->swd = (unsigned short)(env[1] & 0xffff);
+       fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff));
+       fxsave->fip = env[3];
+       fxsave->fop = (unsigned short)((env[4] & 0xffff0000) >> 16);
+       fxsave->fcs = (env[4] & 0xffff);
+       fxsave->foo = env[5];
+       fxsave->fos = env[6];
+
+       to = (struct _fpxreg *) &fxsave->st_space[0];
+       from = &buf->_st[0];
+       for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
+               if ( __copy_from_user( to, from, sizeof(*from) ) )
+                       return 1;
+       }
+       return 0;
+}
+
+/*
+ * Signal frame handlers.
+ */
+
+static inline int save_i387_fsave( struct _fpstate *buf )
+{
+       struct task_struct *tsk = current;
+
+       unlazy_fpu( tsk );
+       tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd;
+       if ( __copy_to_user( buf, &tsk->thread.i387.fsave,
+                            sizeof(struct i387_fsave_struct) ) )
+               return -1;
+       return 1;
+}
+
+static inline int save_i387_fxsave( struct _fpstate *buf )
+{
+       struct task_struct *tsk = current;
+       int err = 0;
+
+       unlazy_fpu( tsk );
+
+       if ( convert_fxsr_to_user( buf, &tsk->thread.i387.fxsave ) )
+               return -1;
+
+       err |= __put_user( tsk->thread.i387.fxsave.swd, &buf->status );
+       err |= __put_user( X86_FXSR_MAGIC, &buf->magic );
+       if ( err )
+               return -1;
+
+       if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.i387.fxsave,
+                            sizeof(struct i387_fxsave_struct) ) )
+               return -1;
+       return 1;
+}
+
+int save_i387( struct _fpstate *buf )
+{
+       if ( !current->used_math )
+               return 0;
+
+       /* This will cause a "finit" to be triggered by the next
+        * attempted FPU operation by the 'current' process.
+        */
+       current->used_math = 0;
+
+       if ( HAVE_HWFP ) {
+               if ( cpu_has_fxsr ) {
+                       return save_i387_fxsave( buf );
+               } else {
+                       return save_i387_fsave( buf );
+               }
+       } 
+}
+
+static inline int restore_i387_fsave( struct _fpstate *buf )
+{
+       struct task_struct *tsk = current;
+       clear_fpu( tsk );
+       return __copy_from_user( &tsk->thread.i387.fsave, buf,
+                                sizeof(struct i387_fsave_struct) );
+}
+
+static inline int restore_i387_fxsave( struct _fpstate *buf )
+{
+       struct task_struct *tsk = current;
+       clear_fpu( tsk );
+       if ( __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
+                              sizeof(struct i387_fxsave_struct) ) )
+               return 1;
+       /* mxcsr bit 6 and 31-16 must be zero for security reasons */
+       tsk->thread.i387.fxsave.mxcsr &= 0xffbf;
+       return convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf );
+}
+
+int restore_i387( struct _fpstate *buf )
+{
+       int err;
+
+       if ( HAVE_HWFP ) {
+               if ( cpu_has_fxsr ) {
+                       err =  restore_i387_fxsave( buf );
+               } else {
+                       err = restore_i387_fsave( buf );
+               }
+       } 
+       current->used_math = 1;
+       return err;
+}
+
+/*
+ * ptrace request handlers.
+ */
+
+int get_fpregs( struct user_i387_struct *buf, struct task_struct *tsk )
+{
+       if ( cpu_has_fxsr ) {
+               if (__copy_to_user( (void *)buf, &tsk->thread.i387.fxsave,
+                                   sizeof(struct user_i387_struct) ))
+                       return -EFAULT;
+               return 0;
+       } else {
+               return -EIO;
+       }
+}
+
+int set_fpregs( struct task_struct *tsk, struct user_i387_struct *buf )
+{
+       if ( cpu_has_fxsr ) {
+               __copy_from_user( &tsk->thread.i387.fxsave, (void *)buf,
+                                 sizeof(struct user_i387_struct) );
+               /* mxcsr bit 6 and 31-16 must be zero for security reasons */
+               tsk->thread.i387.fxsave.mxcsr &= 0xffbf;
+               return 0;
+       } else {
+               return -EIO;
+       }
+}
+
+/*
+ * FPU state for core dumps.
+ */
+
+static inline void copy_fpu_fsave( struct task_struct *tsk,
+                                  struct user_i387_struct *fpu )
+{
+       memcpy( fpu, &tsk->thread.i387.fsave,
+               sizeof(struct user_i387_struct) );
+}
+
+static inline void copy_fpu_fxsave( struct task_struct *tsk,
+                                  struct user_i387_struct *fpu )
+{
+       unsigned short *to;
+       unsigned short *from;
+       int i;
+
+       memcpy( fpu, &tsk->thread.i387.fxsave, 7 * sizeof(u32) );
+
+       to = (unsigned short *)&fpu->st_space[0];
+       from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0];
+       for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) {
+               memcpy( to, from, 5 * sizeof(unsigned short) );
+       }
+}
+
+int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu )
+{
+       int fpvalid;
+       struct task_struct *tsk = current;
+
+       fpvalid = tsk->used_math && cpu_has_fxsr;
+       if ( fpvalid ) {
+               unlazy_fpu( tsk );
+               memcpy( fpu, &tsk->thread.i387.fxsave,
+                       sizeof(struct user_i387_struct) );
+       }
+
+       return fpvalid;
+}
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
new file mode 100644 (file)
index 0000000..1277aa5
--- /dev/null
@@ -0,0 +1,485 @@
+#include <linux/linkage.h>
+#include <linux/config.h>
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+#include <asm/bitops.h>
+#include <asm/pgtable.h>
+#include <asm/delay.h>
+#include <asm/desc.h>
+#include <asm/apic.h>
+
+#include <linux/irq.h>
+
+/* When we have things working, we can switch to always use
+   IOAPIC. --pavel */
+
+/*
+ * Common place to define all x86 IRQ vectors
+ *
+ * This builds up the IRQ handler stubs using some ugly macros in irq.h
+ *
+ * These macros create the low-level assembly IRQ routines that save
+ * register context and call do_IRQ(). do_IRQ() then does all the
+ * operations that are needed to keep the AT (or SMP IOAPIC)
+ * interrupt-controller happy.
+ */
+
+BUILD_COMMON_IRQ()
+
+#define BI(x,y) \
+       BUILD_IRQ(x##y)
+
+#define BUILD_16_IRQS(x) \
+       BI(x,0) BI(x,1) BI(x,2) BI(x,3) \
+       BI(x,4) BI(x,5) BI(x,6) BI(x,7) \
+       BI(x,8) BI(x,9) BI(x,a) BI(x,b) \
+       BI(x,c) BI(x,d) BI(x,e) BI(x,f)
+
+/*
+ * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
+ * (these are usually mapped to vectors 0x20-0x2f)
+ */
+BUILD_16_IRQS(0x0)
+
+#ifdef CONFIG_X86_IO_APIC
+/*
+ * The IO-APIC gives us many more interrupt sources. Most of these 
+ * are unused but an SMP system is supposed to have enough memory ...
+ * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
+ * across the spectrum, so we really want to be prepared to get all
+ * of these. Plus, more powerful systems might have more than 64
+ * IO-APIC registers.
+ *
+ * (these are usually mapped into the 0x30-0xff vector range)
+ */
+                  BUILD_16_IRQS(0x1) BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3)
+BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7)
+BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb)
+BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
+#endif
+
+#undef BUILD_16_IRQS
+#undef BI
+
+
+/*
+ * The following vectors are part of the Linux architecture, there
+ * is no hardware IRQ pin equivalent for them, they are triggered
+ * through the ICC by us (IPIs)
+ */
+#ifdef CONFIG_SMP
+BUILD_SMP_INTERRUPT(task_migration_interrupt,TASK_MIGRATION_VECTOR);
+BUILD_SMP_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR);
+BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR);
+BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR); 
+#endif
+
+/*
+ * every pentium local APIC has two 'local interrupts', with a
+ * soft-definable vector attached to both interrupts, one of
+ * which is a timer interrupt, the other one is error counter
+ * overflow. Linux uses the local APIC timer interrupt to get
+ * a much simpler SMP time architecture:
+ */
+#ifdef CONFIG_X86_LOCAL_APIC
+BUILD_SMP_INTERRUPT(apic_timer_interrupt, LOCAL_TIMER_VECTOR);
+BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR);
+BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR);
+#endif
+
+#define IRQ(x,y) \
+       IRQ##x##y##_interrupt
+
+#define IRQLIST_16(x) \
+       IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \
+       IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \
+       IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \
+       IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f)
+
+void (*interrupt[NR_IRQS])(void) = {
+       IRQLIST_16(0x0),
+
+#ifdef CONFIG_X86_IO_APIC
+                        IRQLIST_16(0x1), IRQLIST_16(0x2), IRQLIST_16(0x3),
+       IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7),
+       IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb),
+       IRQLIST_16(0xc), IRQLIST_16(0xd)
+#endif
+};
+
+#undef IRQ
+#undef IRQLIST_16
+
+/*
+ * This is the 'legacy' 8259A Programmable Interrupt Controller,
+ * present in the majority of PC/AT boxes.
+ * plus some generic x86 specific things if generic specifics makes
+ * any sense at all.
+ * this file should become arch/i386/kernel/irq.c when the old irq.c
+ * moves to arch independent land
+ */
+
+spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED;
+
+static void end_8259A_irq (unsigned int irq)
+{
+       if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
+               enable_8259A_irq(irq);
+}
+
+#define shutdown_8259A_irq     disable_8259A_irq
+
+void mask_and_ack_8259A(unsigned int);
+
+static unsigned int startup_8259A_irq(unsigned int irq)
+{ 
+       enable_8259A_irq(irq);
+       return 0; /* never anything pending */
+}
+
+static struct hw_interrupt_type i8259A_irq_type = {
+       "XT-PIC",
+       startup_8259A_irq,
+       shutdown_8259A_irq,
+       enable_8259A_irq,
+       disable_8259A_irq,
+       mask_and_ack_8259A,
+       end_8259A_irq,
+       NULL
+};
+
+/*
+ * 8259A PIC functions to handle ISA devices:
+ */
+
+/*
+ * This contains the irq mask for both 8259A irq controllers,
+ */
+static unsigned int cached_irq_mask = 0xffff;
+
+#define __byte(x,y)    (((unsigned char *)&(y))[x])
+#define cached_21      (__byte(0,cached_irq_mask))
+#define cached_A1      (__byte(1,cached_irq_mask))
+
+/*
+ * Not all IRQs can be routed through the IO-APIC, eg. on certain (older)
+ * boards the timer interrupt is not really connected to any IO-APIC pin,
+ * it's fed to the master 8259A's IR0 line only.
+ *
+ * Any '1' bit in this mask means the IRQ is routed through the IO-APIC.
+ * this 'mixed mode' IRQ handling costs nothing because it's only used
+ * at IRQ setup time.
+ */
+unsigned long io_apic_irqs;
+
+void disable_8259A_irq(unsigned int irq)
+{
+       unsigned int mask = 1 << irq;
+       unsigned long flags;
+
+       spin_lock_irqsave(&i8259A_lock, flags);
+       cached_irq_mask |= mask;
+       if (irq & 8)
+               outb(cached_A1,0xA1);
+       else
+               outb(cached_21,0x21);
+       spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+void enable_8259A_irq(unsigned int irq)
+{
+       unsigned int mask = ~(1 << irq);
+       unsigned long flags;
+
+       spin_lock_irqsave(&i8259A_lock, flags);
+       cached_irq_mask &= mask;
+       if (irq & 8)
+               outb(cached_A1,0xA1);
+       else
+               outb(cached_21,0x21);
+       spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+int i8259A_irq_pending(unsigned int irq)
+{
+       unsigned int mask = 1<<irq;
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&i8259A_lock, flags);
+       if (irq < 8)
+               ret = inb(0x20) & mask;
+       else
+               ret = inb(0xA0) & (mask >> 8);
+       spin_unlock_irqrestore(&i8259A_lock, flags);
+
+       return ret;
+}
+
+void make_8259A_irq(unsigned int irq)
+{
+       disable_irq_nosync(irq);
+       io_apic_irqs &= ~(1<<irq);
+       irq_desc[irq].handler = &i8259A_irq_type;
+       enable_irq(irq);
+}
+
+/*
+ * This function assumes to be called rarely. Switching between
+ * 8259A registers is slow.
+ * This has to be protected by the irq controller spinlock
+ * before being called.
+ */
+static inline int i8259A_irq_real(unsigned int irq)
+{
+       int value;
+       int irqmask = 1<<irq;
+
+       if (irq < 8) {
+               outb(0x0B,0x20);                /* ISR register */
+               value = inb(0x20) & irqmask;
+               outb(0x0A,0x20);                /* back to the IRR register */
+               return value;
+       }
+       outb(0x0B,0xA0);                /* ISR register */
+       value = inb(0xA0) & (irqmask >> 8);
+       outb(0x0A,0xA0);                /* back to the IRR register */
+       return value;
+}
+
+/*
+ * Careful! The 8259A is a fragile beast, it pretty
+ * much _has_ to be done exactly like this (mask it
+ * first, _then_ send the EOI, and the order of EOI
+ * to the two 8259s is important!
+ */
+void mask_and_ack_8259A(unsigned int irq)
+{
+       unsigned int irqmask = 1 << irq;
+       unsigned long flags;
+
+       spin_lock_irqsave(&i8259A_lock, flags);
+       /*
+        * Lightweight spurious IRQ detection. We do not want
+        * to overdo spurious IRQ handling - it's usually a sign
+        * of hardware problems, so we only do the checks we can
+        * do without slowing down good hardware unnecesserily.
+        *
+        * Note that IRQ7 and IRQ15 (the two spurious IRQs
+        * usually resulting from the 8259A-1|2 PICs) occur
+        * even if the IRQ is masked in the 8259A. Thus we
+        * can check spurious 8259A IRQs without doing the
+        * quite slow i8259A_irq_real() call for every IRQ.
+        * This does not cover 100% of spurious interrupts,
+        * but should be enough to warn the user that there
+        * is something bad going on ...
+        */
+       if (cached_irq_mask & irqmask)
+               goto spurious_8259A_irq;
+       cached_irq_mask |= irqmask;
+
+handle_real_irq:
+       if (irq & 8) {
+               inb(0xA1);              /* DUMMY - (do we need this?) */
+               outb(cached_A1,0xA1);
+               outb(0x60+(irq&7),0xA0);/* 'Specific EOI' to slave */
+               outb(0x62,0x20);        /* 'Specific EOI' to master-IRQ2 */
+       } else {
+               inb(0x21);              /* DUMMY - (do we need this?) */
+               outb(cached_21,0x21);
+               outb(0x60+irq,0x20);    /* 'Specific EOI' to master */
+       }
+       spin_unlock_irqrestore(&i8259A_lock, flags);
+       return;
+
+spurious_8259A_irq:
+       /*
+        * this is the slow path - should happen rarely.
+        */
+       if (i8259A_irq_real(irq))
+               /*
+                * oops, the IRQ _is_ in service according to the
+                * 8259A - not spurious, go handle it.
+                */
+               goto handle_real_irq;
+
+       {
+               static int spurious_irq_mask;
+               /*
+                * At this point we can be sure the IRQ is spurious,
+                * lets ACK and report it. [once per IRQ]
+                */
+               if (!(spurious_irq_mask & irqmask)) {
+                       printk("spurious 8259A interrupt: IRQ%d.\n", irq);
+                       spurious_irq_mask |= irqmask;
+               }
+               atomic_inc(&irq_err_count);
+               /*
+                * Theoretically we do not have to handle this IRQ,
+                * but in Linux this does not cause problems and is
+                * simpler for us.
+                */
+               goto handle_real_irq;
+       }
+}
+
+void __init init_8259A(int auto_eoi)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&i8259A_lock, flags);
+
+       outb(0xff, 0x21);       /* mask all of 8259A-1 */
+       outb(0xff, 0xA1);       /* mask all of 8259A-2 */
+
+       /*
+        * outb_p - this has to work on a wide range of PC hardware.
+        */
+       outb_p(0x11, 0x20);     /* ICW1: select 8259A-1 init */
+       outb_p(0x20 + 0, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */
+       outb_p(0x04, 0x21);     /* 8259A-1 (the master) has a slave on IR2 */
+       if (auto_eoi)
+               outb_p(0x03, 0x21);     /* master does Auto EOI */
+       else
+               outb_p(0x01, 0x21);     /* master expects normal EOI */
+
+       outb_p(0x11, 0xA0);     /* ICW1: select 8259A-2 init */
+       outb_p(0x20 + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */
+       outb_p(0x02, 0xA1);     /* 8259A-2 is a slave on master's IR2 */
+       outb_p(0x01, 0xA1);     /* (slave's support for AEOI in flat mode
+                                   is to be investigated) */
+
+       if (auto_eoi)
+               /*
+                * in AEOI mode we just have to mask the interrupt
+                * when acking.
+                */
+               i8259A_irq_type.ack = disable_8259A_irq;
+       else
+               i8259A_irq_type.ack = mask_and_ack_8259A;
+
+       udelay(100);            /* wait for 8259A to initialize */
+
+       outb(cached_21, 0x21);  /* restore master IRQ mask */
+       outb(cached_A1, 0xA1);  /* restore slave IRQ mask */
+
+       spin_unlock_irqrestore(&i8259A_lock, flags);
+}
+
+/*
+ * IRQ2 is cascade interrupt to second interrupt controller
+ */
+
+#ifndef CONFIG_VISWS
+static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL};
+#endif
+
+
+void __init init_ISA_irqs (void)
+{
+       int i;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+       init_bsp_APIC();
+#endif
+       init_8259A(0);
+
+       for (i = 0; i < NR_IRQS; i++) {
+               irq_desc[i].status = IRQ_DISABLED;
+               irq_desc[i].action = 0;
+               irq_desc[i].depth = 1;
+
+               if (i < 16) {
+                       /*
+                        * 16 old-style INTA-cycle interrupts:
+                        */
+                       irq_desc[i].handler = &i8259A_irq_type;
+               } else {
+                       /*
+                        * 'high' PCI IRQs filled in on demand
+                        */
+                       irq_desc[i].handler = &no_irq_type;
+               }
+       }
+}
+
+void __init init_IRQ(void)
+{
+       int i;
+
+#ifndef CONFIG_X86_VISWS_APIC
+       init_ISA_irqs();
+#else
+       init_VISWS_APIC_irqs();
+#endif
+       /*
+        * Cover the whole vector space, no vector can escape
+        * us. (some of these will be overridden and become
+        * 'special' SMP interrupts)
+        */
+       for (i = 0; i < NR_IRQS; i++) {
+               int vector = FIRST_EXTERNAL_VECTOR + i;
+               if (vector != IA32_SYSCALL_VECTOR)
+                       set_intr_gate(vector, interrupt[i]);
+       }
+
+#ifdef CONFIG_SMP
+       /*
+        * IRQ0 must be given a fixed assignment and initialized,
+        * because it's used before the IO-APIC is set up.
+        */
+       set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
+
+       /*
+        * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+        * IPI, driven by wakeup.
+        */
+       set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+
+       /* IPI for task migration */
+       set_intr_gate(TASK_MIGRATION_VECTOR, task_migration_interrupt);
+
+       /* IPI for invalidation */
+       set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+
+       /* IPI for generic function call */
+       set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+#endif 
+
+#ifdef CONFIG_X86_LOCAL_APIC
+       /* self generated IPI for local APIC timer */
+       set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+
+       /* IPI vectors for APIC spurious and error interrupts */
+       set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
+       set_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+#endif
+
+       /*
+        * Set the clock to HZ Hz, we already have a valid
+        * vector now:
+        */
+       outb_p(0x34,0x43);              /* binary, mode 2, LSB/MSB, ch 0 */
+       outb_p(LATCH & 0xff , 0x40);    /* LSB */
+       outb(LATCH >> 8 , 0x40);        /* MSB */
+
+#ifndef CONFIG_VISWS
+       setup_irq(2, &irq2);
+#endif
+}
diff --git a/arch/x86_64/kernel/init_task.c b/arch/x86_64/kernel/init_task.c
new file mode 100644 (file)
index 0000000..46efd5a
--- /dev/null
@@ -0,0 +1,42 @@
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/desc.h>
+#include <asm/thread_info.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS;
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is 8192-byte aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union thread_union init_thread_union 
+       __attribute__((__section__(".data.init_task"))) =
+               { INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
+/*
+ * per-CPU TSS segments. Threads are completely 'soft' on Linux,
+ * no more per-task TSS's. The TSS size is kept cacheline-aligned
+ * so they are allowed to end up in the .data.cacheline_aligned
+ * section. Since TSS's are completely CPU-local, we want them
+ * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+ */ 
+struct tss_struct init_tss[NR_CPUS] __cacheline_aligned = { [0 ... NR_CPUS-1] = INIT_TSS };
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
new file mode 100644 (file)
index 0000000..e1ff5ae
--- /dev/null
@@ -0,0 +1,1617 @@
+/*
+ *     Intel IO-APIC support for multi-Pentium hosts.
+ *
+ *     Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
+ *
+ *     Many thanks to Stig Venaas for trying out countless experimental
+ *     patches and reporting/debugging problems patiently!
+ *
+ *     (c) 1999, Multiple IO-APIC support, developed by
+ *     Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
+ *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
+ *     further tested and cleaned up by Zach Brown <zab@redhat.com>
+ *     and Ingo Molnar <mingo@redhat.com>
+ *
+ *     Fixes
+ *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs;
+ *                                     thanks to Eric Gilmore
+ *                                     and Rolf G. Tews
+ *                                     for testing these extensively
+ */
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/config.h>
+#include <linux/smp_lock.h>
+#include <linux/mc146818rtc.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/desc.h>
+
+#undef APIC_LOCKUP_DEBUG
+
+#define APIC_LOCKUP_DEBUG
+
+static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * # of IRQ routing registers
+ */
+int nr_ioapic_registers[MAX_IO_APICS];
+
+/*
+ * Rough estimation of how many shared IRQs there are, can
+ * be changed anytime.
+ */
+#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
+
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * the indexing order of this array favors 1:1 mappings
+ * between pins and IRQs.
+ */
+
+static struct irq_pin_list {
+       int apic, pin, next;
+} irq_2_pin[PIN_MAP_SIZE];
+
+/*
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+ * shared ISA-space IRQs, so we have to support them. We are super
+ * fast in the common case, and fast for shared ISA-space IRQs.
+ */
+static void add_pin_to_irq(unsigned int irq, int apic, int pin)
+{
+       static int first_free_entry = NR_IRQS;
+       struct irq_pin_list *entry = irq_2_pin + irq;
+
+       while (entry->next)
+               entry = irq_2_pin + entry->next;
+
+       if (entry->pin != -1) {
+               entry->next = first_free_entry;
+               entry = irq_2_pin + entry->next;
+               if (++first_free_entry >= PIN_MAP_SIZE)
+                       panic("io_apic.c: whoops");
+       }
+       entry->apic = apic;
+       entry->pin = pin;
+}
+
+#define __DO_ACTION(R, ACTION, FINAL)                                  \
+                                                                       \
+{                                                                      \
+       int pin;                                                        \
+       struct irq_pin_list *entry = irq_2_pin + irq;                   \
+                                                                       \
+       for (;;) {                                                      \
+               unsigned int reg;                                       \
+               pin = entry->pin;                                       \
+               if (pin == -1)                                          \
+                       break;                                          \
+               reg = io_apic_read(entry->apic, 0x10 + R + pin*2);      \
+               reg ACTION;                                             \
+               io_apic_modify(entry->apic, reg);                       \
+               if (!entry->next)                                       \
+                       break;                                          \
+               entry = irq_2_pin + entry->next;                        \
+       }                                                               \
+       FINAL;                                                          \
+}
+
+#define DO_ACTION(name,R,ACTION, FINAL)                                        \
+                                                                       \
+       static void name##_IO_APIC_irq (unsigned int irq)               \
+       __DO_ACTION(R, ACTION, FINAL)
+
+DO_ACTION( __mask,             0, |= 0x00010000, io_apic_sync(entry->apic) )
+                                               /* mask = 1 */
+DO_ACTION( __unmask,           0, &= 0xfffeffff, )
+                                               /* mask = 0 */
+DO_ACTION( __mask_and_edge,    0, = (reg & 0xffff7fff) | 0x00010000, )
+                                               /* mask = 1, trigger = 0 */
+DO_ACTION( __unmask_and_level, 0, = (reg & 0xfffeffff) | 0x00008000, )
+                                               /* mask = 0, trigger = 1 */
+
+static void mask_IO_APIC_irq (unsigned int irq)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __mask_IO_APIC_irq(irq);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void unmask_IO_APIC_irq (unsigned int irq)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __unmask_IO_APIC_irq(irq);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+{
+       struct IO_APIC_route_entry entry;
+       unsigned long flags;
+
+       /*
+        * Disable it in the IO-APIC irq-routing table:
+        */
+       memset(&entry, 0, sizeof(entry));
+       entry.mask = 1;
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+       io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC (void)
+{
+       int apic, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++)
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+                       clear_IO_APIC_pin(apic, pin);
+}
+
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+int pirq_entries [MAX_PIRQS];
+int pirqs_enabled;
+int skip_ioapic_setup;
+
+static int __init ioapic_setup(char *str)
+{
+       skip_ioapic_setup = 1;
+       return 1;
+}
+
+__setup("noapic", ioapic_setup);
+
+static int __init ioapic_pirq_setup(char *str)
+{
+       int i, max;
+       int ints[MAX_PIRQS+1];
+
+       get_options(str, ARRAY_SIZE(ints), ints);
+
+       for (i = 0; i < MAX_PIRQS; i++)
+               pirq_entries[i] = -1;
+
+       pirqs_enabled = 1;
+       printk(KERN_INFO "PIRQ redirection, working around broken MP-BIOS.\n");
+       max = MAX_PIRQS;
+       if (ints[0] < MAX_PIRQS)
+               max = ints[0];
+
+       for (i = 0; i < max; i++) {
+               printk(KERN_DEBUG "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+               /*
+                * PIRQs are mapped upside down, usually.
+                */
+               pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+       }
+       return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+
+/*
+ * Find the IRQ entry number of a certain pin.
+ */
+static int __init find_irq_entry(int apic, int pin, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++)
+               if (mp_irqs[i].mpc_irqtype == type &&
+                   (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
+                    mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
+                   mp_irqs[i].mpc_dstirq == pin)
+                       return i;
+
+       return -1;
+}
+
+/*
+ * Find the pin to which IRQ[irq] (ISA) is connected
+ */
+static int __init find_isa_irq_pin(int irq, int type)
+{
+       int i;
+
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+
+               if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+                    mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
+                   (mp_irqs[i].mpc_irqtype == type) &&
+                   (mp_irqs[i].mpc_srcbusirq == irq))
+
+                       return mp_irqs[i].mpc_dstirq;
+       }
+       return -1;
+}
+
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+static int pin_2_irq(int idx, int apic, int pin);
+
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+{
+       int apic, i, best_guess = -1;
+
+       Dprintk("querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+               bus, slot, pin);
+       if (mp_bus_id_to_pci_bus[bus] == -1) {
+               printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+               return -1;
+       }
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].mpc_srcbus;
+
+               for (apic = 0; apic < nr_ioapics; apic++)
+                       if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
+                           mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+                               break;
+
+               if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
+                   !mp_irqs[i].mpc_irqtype &&
+                   (bus == lbus) &&
+                   (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
+                       int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+
+                       if (!(apic || IO_APIC_IRQ(irq)))
+                               continue;
+
+                       if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+                               return irq;
+                       /*
+                        * Use the first all-but-pin matching entry as a
+                        * best-guess fuzzy result for broken mptables.
+                        */
+                       if (best_guess < 0)
+                               best_guess = irq;
+               }
+       }
+       return best_guess;
+}
+
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int __init EISA_ELCR(unsigned int irq)
+{
+       if (irq < 16) {
+               unsigned int port = 0x4d0 + (irq >> 3);
+               return (inb(port) >> (irq & 7)) & 1;
+       }
+       printk(KERN_INFO "Broken MPtable reports ISA irq %d\n", irq);
+       return 0;
+}
+
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value.  If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx)      (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+#define default_EISA_polarity(idx)     (0)
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx)       (0)
+#define default_ISA_polarity(idx)      (0)
+
+/* PCI interrupts are always polarity one level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_PCI_trigger(idx)       (1)
+#define default_PCI_polarity(idx)      (1)
+
+/* MCA interrupts are always polarity zero level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_MCA_trigger(idx)       (1)
+#define default_MCA_polarity(idx)      (0)
+
+static int __init MPBIOS_polarity(int idx)
+{
+       int bus = mp_irqs[idx].mpc_srcbus;
+       int polarity;
+
+       /*
+        * Determine IRQ line polarity (high active or low active):
+        */
+       switch (mp_irqs[idx].mpc_irqflag & 3)
+       {
+               case 0: /* conforms, ie. bus-type dependent polarity */
+               {
+                       switch (mp_bus_id_to_type[bus])
+                       {
+                               case MP_BUS_ISA: /* ISA pin */
+                               {
+                                       polarity = default_ISA_polarity(idx);
+                                       break;
+                               }
+                               case MP_BUS_EISA: /* EISA pin */
+                               {
+                                       polarity = default_EISA_polarity(idx);
+                                       break;
+                               }
+                               case MP_BUS_PCI: /* PCI pin */
+                               {
+                                       polarity = default_PCI_polarity(idx);
+                                       break;
+                               }
+                               case MP_BUS_MCA: /* MCA pin */
+                               {
+                                       polarity = default_MCA_polarity(idx);
+                                       break;
+                               }
+                               default:
+                               {
+                                       printk(KERN_WARNING "broken BIOS!!\n");
+                                       polarity = 1;
+                                       break;
+                               }
+                       }
+                       break;
+               }
+               case 1: /* high active */
+               {
+                       polarity = 0;
+                       break;
+               }
+               case 2: /* reserved */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       polarity = 1;
+                       break;
+               }
+               case 3: /* low active */
+               {
+                       polarity = 1;
+                       break;
+               }
+               default: /* invalid */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       polarity = 1;
+                       break;
+               }
+       }
+       return polarity;
+}
+
+static int __init MPBIOS_trigger(int idx)
+{
+       int bus = mp_irqs[idx].mpc_srcbus;
+       int trigger;
+
+       /*
+        * Determine IRQ trigger mode (edge or level sensitive):
+        */
+       switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+       {
+               case 0: /* conforms, ie. bus-type dependent */
+               {
+                       switch (mp_bus_id_to_type[bus])
+                       {
+                               case MP_BUS_ISA: /* ISA pin */
+                               {
+                                       trigger = default_ISA_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_EISA: /* EISA pin */
+                               {
+                                       trigger = default_EISA_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_PCI: /* PCI pin */
+                               {
+                                       trigger = default_PCI_trigger(idx);
+                                       break;
+                               }
+                               case MP_BUS_MCA: /* MCA pin */
+                               {
+                                       trigger = default_MCA_trigger(idx);
+                                       break;
+                               }
+                               default:
+                               {
+                                       printk(KERN_WARNING "broken BIOS!!\n");
+                                       trigger = 1;
+                                       break;
+                               }
+                       }
+                       break;
+               }
+               case 1: /* edge */
+               {
+                       trigger = 0;
+                       break;
+               }
+               case 2: /* reserved */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       trigger = 1;
+                       break;
+               }
+               case 3: /* level */
+               {
+                       trigger = 1;
+                       break;
+               }
+               default: /* invalid */
+               {
+                       printk(KERN_WARNING "broken BIOS!!\n");
+                       trigger = 0;
+                       break;
+               }
+       }
+       return trigger;
+}
+
+static inline int irq_polarity(int idx)
+{
+       return MPBIOS_polarity(idx);
+}
+
+static inline int irq_trigger(int idx)
+{
+       return MPBIOS_trigger(idx);
+}
+
+static int pin_2_irq(int idx, int apic, int pin)
+{
+       int irq, i;
+       int bus = mp_irqs[idx].mpc_srcbus;
+
+       /*
+        * Debugging check, we are in big trouble if this message pops up!
+        */
+       if (mp_irqs[idx].mpc_dstirq != pin)
+               printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+
+       switch (mp_bus_id_to_type[bus])
+       {
+               case MP_BUS_ISA: /* ISA pin */
+               case MP_BUS_EISA:
+               case MP_BUS_MCA:
+               {
+                       irq = mp_irqs[idx].mpc_srcbusirq;
+                       break;
+               }
+               case MP_BUS_PCI: /* PCI pin */
+               {
+                       /*
+                        * PCI IRQs are mapped in order
+                        */
+                       i = irq = 0;
+                       while (i < apic)
+                               irq += nr_ioapic_registers[i++];
+                       irq += pin;
+                       break;
+               }
+               default:
+               {
+                       printk(KERN_ERR "unknown bus type %d.\n",bus); 
+                       irq = 0;
+                       break;
+               }
+       }
+
+       /*
+        * PCI IRQ command line redirection. Yes, limits are hardcoded.
+        */
+       if ((pin >= 16) && (pin <= 23)) {
+               if (pirq_entries[pin-16] != -1) {
+                       if (!pirq_entries[pin-16]) {
+                               printk(KERN_DEBUG "disabling PIRQ%d\n", pin-16);
+                       } else {
+                               irq = pirq_entries[pin-16];
+                               printk(KERN_DEBUG "using PIRQ%d -> IRQ %d\n",
+                                               pin-16, irq);
+                       }
+               }
+       }
+       return irq;
+}
+
+static inline int IO_APIC_irq_trigger(int irq)
+{
+       int apic, idx, pin;
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+               for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+                       idx = find_irq_entry(apic,pin,mp_INT);
+                       if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
+                               return irq_trigger(idx);
+               }
+       }
+       /*
+        * nonexistent IRQs are edge default
+        */
+       return 0;
+}
+
+int irq_vector[NR_IRQS] = { FIRST_DEVICE_VECTOR , 0 };
+
+static int __init assign_irq_vector(int irq)
+{
+       static int current_vector = FIRST_DEVICE_VECTOR, offset = 0;
+       if (IO_APIC_VECTOR(irq) > 0)
+               return IO_APIC_VECTOR(irq);
+next:
+       current_vector += 8;
+       if (current_vector == IA32_SYSCALL_VECTOR)
+               goto next;
+
+       if (current_vector > FIRST_SYSTEM_VECTOR) {
+               offset++;
+               current_vector = FIRST_DEVICE_VECTOR + offset;
+       }
+
+       if (current_vector == FIRST_SYSTEM_VECTOR)
+               panic("ran out of interrupt sources!");
+
+       IO_APIC_VECTOR(irq) = current_vector;
+       return current_vector;
+}
+
+extern void (*interrupt[NR_IRQS])(void);
+static struct hw_interrupt_type ioapic_level_irq_type;
+static struct hw_interrupt_type ioapic_edge_irq_type;
+
+void __init setup_IO_APIC_irqs(void)
+{
+       struct IO_APIC_route_entry entry;
+       int apic, pin, idx, irq, first_notcon = 1, vector;
+       unsigned long flags;
+
+       printk(KERN_DEBUG "init IO_APIC IRQs\n");
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+       for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+
+               /*
+                * add it to the IO-APIC irq-routing table:
+                */
+               memset(&entry,0,sizeof(entry));
+
+               entry.delivery_mode = dest_LowestPrio;
+               entry.dest_mode = INT_DELIVERY_MODE;
+               entry.mask = 0;                         /* enable IRQ */
+               entry.dest.logical.logical_dest = TARGET_CPUS;
+
+               idx = find_irq_entry(apic,pin,mp_INT);
+               if (idx == -1) {
+                       if (first_notcon) {
+                               printk(KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+                               first_notcon = 0;
+                       } else
+                               printk(", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+                       continue;
+               }
+
+               entry.trigger = irq_trigger(idx);
+               entry.polarity = irq_polarity(idx);
+
+               if (irq_trigger(idx)) {
+                       entry.trigger = 1;
+                       entry.mask = 1;
+                       entry.dest.logical.logical_dest = TARGET_CPUS;
+               }
+
+               irq = pin_2_irq(idx, apic, pin);
+               add_pin_to_irq(irq, apic, pin);
+
+               if (!apic && !IO_APIC_IRQ(irq))
+                       continue;
+
+               if (IO_APIC_IRQ(irq)) {
+                       vector = assign_irq_vector(irq);
+                       entry.vector = vector;
+
+                       if (IO_APIC_irq_trigger(irq))
+                               irq_desc[irq].handler = &ioapic_level_irq_type;
+                       else
+                               irq_desc[irq].handler = &ioapic_edge_irq_type;
+
+                       set_intr_gate(vector, interrupt[irq]);
+               
+                       if (!apic && (irq < 16))
+                               disable_8259A_irq(irq);
+               }
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+               io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+       }
+       }
+
+       if (!first_notcon)
+               printk(" not connected.\n");
+}
+
+/*
+ * Set up the 8259A-master output pin as broadcast to all
+ * CPUs.
+ */
+void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
+{
+       struct IO_APIC_route_entry entry;
+       unsigned long flags;
+
+       memset(&entry,0,sizeof(entry));
+
+       disable_8259A_irq(0);
+
+       /* mask LVT0 */
+       apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+
+       /*
+        * We use logical delivery to get the timer IRQ
+        * to the first CPU.
+        */
+       entry.dest_mode = INT_DELIVERY_MODE;
+       entry.mask = 0;                                 /* unmask IRQ now */
+       entry.dest.logical.logical_dest = TARGET_CPUS;
+       entry.delivery_mode = dest_LowestPrio;
+       entry.polarity = 0;
+       entry.trigger = 0;
+       entry.vector = vector;
+
+       /*
+        * The timer IRQ doesnt have to know that behind the
+        * scene we have a 8259A-master in AEOI mode ...
+        */
+       irq_desc[0].handler = &ioapic_edge_irq_type;
+
+       /*
+        * Add it to the IO-APIC irq-routing table:
+        */
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
+       io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       enable_8259A_irq(0);
+}
+
+void __init UNEXPECTED_IO_APIC(void)
+{
+       printk(KERN_WARNING " WARNING: unexpected IO-APIC, please mail\n");
+       printk(KERN_WARNING "          to linux-smp@vger.kernel.org\n");
+}
+
+void __init print_IO_APIC(void)
+{
+       int apic, i;
+       struct IO_APIC_reg_00 reg_00;
+       struct IO_APIC_reg_01 reg_01;
+       struct IO_APIC_reg_02 reg_02;
+       unsigned long flags;
+
+       printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+       for (i = 0; i < nr_ioapics; i++)
+               printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+                      mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+
+       /*
+        * We are a bit conservative about what we expect.  We have to
+        * know about every hardware change ASAP.
+        */
+       printk(KERN_INFO "testing the IO APIC.......................\n");
+
+       for (apic = 0; apic < nr_ioapics; apic++) {
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       *(int *)&reg_00 = io_apic_read(apic, 0);
+       *(int *)&reg_01 = io_apic_read(apic, 1);
+       if (reg_01.version >= 0x10)
+               *(int *)&reg_02 = io_apic_read(apic, 2);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       printk("\n");
+       printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+       printk(KERN_DEBUG ".... register #00: %08X\n", *(int *)&reg_00);
+       printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.ID);
+       if (reg_00.__reserved_1 || reg_00.__reserved_2)
+               UNEXPECTED_IO_APIC();
+
+       printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
+       printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.entries);
+       if (    (reg_01.entries != 0x0f) && /* older (Neptune) boards */
+               (reg_01.entries != 0x17) && /* typical ISA+PCI boards */
+               (reg_01.entries != 0x1b) && /* Compaq Proliant boards */
+               (reg_01.entries != 0x1f) && /* dual Xeon boards */
+               (reg_01.entries != 0x22) && /* bigger Xeon boards */
+               (reg_01.entries != 0x2E) &&
+               (reg_01.entries != 0x3F)
+       )
+               UNEXPECTED_IO_APIC();
+
+       printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.PRQ);
+       printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.version);
+       if (    (reg_01.version != 0x01) && /* 82489DX IO-APICs */
+               (reg_01.version != 0x02) && /* 82801BA IO-APICs (ICH2) */
+               (reg_01.version != 0x10) && /* oldest IO-APICs */
+               (reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */
+               (reg_01.version != 0x13) && /* Xeon IO-APICs */
+               (reg_01.version != 0x20)    /* Intel P64H (82806 AA) */
+       )
+               UNEXPECTED_IO_APIC();
+       if (reg_01.__reserved_1 || reg_01.__reserved_2)
+               UNEXPECTED_IO_APIC();
+
+       if (reg_01.version >= 0x10) {
+               printk(KERN_DEBUG ".... register #02: %08X\n", *(int *)&reg_02);
+               printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.arbitration);
+               if (reg_02.__reserved_1 || reg_02.__reserved_2)
+                       UNEXPECTED_IO_APIC();
+       }
+
+       printk(KERN_DEBUG ".... IRQ redirection table:\n");
+
+       printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
+                         " Stat Dest Deli Vect:   \n");
+
+       for (i = 0; i <= reg_01.entries; i++) {
+               struct IO_APIC_route_entry entry;
+
+               spin_lock_irqsave(&ioapic_lock, flags);
+               *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+               *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               printk(KERN_DEBUG " %02x %03X %02X  ",
+                       i,
+                       entry.dest.logical.logical_dest,
+                       entry.dest.physical.physical_dest
+               );
+
+               printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
+                       entry.mask,
+                       entry.trigger,
+                       entry.irr,
+                       entry.polarity,
+                       entry.delivery_status,
+                       entry.dest_mode,
+                       entry.delivery_mode,
+                       entry.vector
+               );
+       }
+       }
+       printk(KERN_DEBUG "IRQ to pin mappings:\n");
+       for (i = 0; i < NR_IRQS; i++) {
+               struct irq_pin_list *entry = irq_2_pin + i;
+               if (entry->pin < 0)
+                       continue;
+               printk(KERN_DEBUG "IRQ%d ", i);
+               for (;;) {
+                       printk("-> %d:%d", entry->apic, entry->pin);
+                       if (!entry->next)
+                               break;
+                       entry = irq_2_pin + entry->next;
+               }
+               printk("\n");
+       }
+
+       printk(KERN_INFO ".................................... done.\n");
+
+       return;
+}
+
+static void print_APIC_bitfield (int base)
+{
+       unsigned int v;
+       int i, j;
+
+       printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
+       for (i = 0; i < 8; i++) {
+               v = apic_read(base + i*0x10);
+               for (j = 0; j < 32; j++) {
+                       if (v & (1<<j))
+                               printk("1");
+                       else
+                               printk("0");
+               }
+               printk("\n");
+       }
+}
+
+void /*__init*/ print_local_APIC(void * dummy)
+{
+       unsigned int v, ver, maxlvt;
+
+       printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+               smp_processor_id(), hard_smp_processor_id());
+       v = apic_read(APIC_ID);
+       printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
+       v = apic_read(APIC_LVR);
+       printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+       ver = GET_APIC_VERSION(v);
+       maxlvt = get_maxlvt();
+
+       v = apic_read(APIC_TASKPRI);
+       printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+       if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
+               v = apic_read(APIC_ARBPRI);
+               printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+                       v & APIC_ARBPRI_MASK);
+               v = apic_read(APIC_PROCPRI);
+               printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+       }
+
+       v = apic_read(APIC_EOI);
+       printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
+       v = apic_read(APIC_RRR);
+       printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+       v = apic_read(APIC_LDR);
+       printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
+       v = apic_read(APIC_DFR);
+       printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+       v = apic_read(APIC_SPIV);
+       printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
+
+       printk(KERN_DEBUG "... APIC ISR field:\n");
+       print_APIC_bitfield(APIC_ISR);
+       printk(KERN_DEBUG "... APIC TMR field:\n");
+       print_APIC_bitfield(APIC_TMR);
+       printk(KERN_DEBUG "... APIC IRR field:\n");
+       print_APIC_bitfield(APIC_IRR);
+
+       if (APIC_INTEGRATED(ver)) {             /* !82489DX */
+               if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+                       apic_write(APIC_ESR, 0);
+               v = apic_read(APIC_ESR);
+               printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+       }
+
+       v = apic_read(APIC_ICR);
+       printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
+       v = apic_read(APIC_ICR2);
+       printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
+
+       v = apic_read(APIC_LVTT);
+       printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
+
+       if (maxlvt > 3) {                       /* PC is LVT#4. */
+               v = apic_read(APIC_LVTPC);
+               printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
+       }
+       v = apic_read(APIC_LVT0);
+       printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
+       v = apic_read(APIC_LVT1);
+       printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
+
+       if (maxlvt > 2) {                       /* ERR is LVT#3. */
+               v = apic_read(APIC_LVTERR);
+               printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
+       }
+
+       v = apic_read(APIC_TMICT);
+       printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
+       v = apic_read(APIC_TMCCT);
+       printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
+       v = apic_read(APIC_TDCR);
+       printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+       printk("\n");
+}
+
+void print_all_local_APICs (void)
+{
+       smp_call_function(print_local_APIC, NULL, 1, 1);
+       print_local_APIC(NULL);
+}
+
+void /*__init*/ print_PIC(void)
+{
+       extern spinlock_t i8259A_lock;
+       unsigned int v;
+       unsigned long flags;
+
+       printk(KERN_DEBUG "\nprinting PIC contents\n");
+
+       spin_lock_irqsave(&i8259A_lock, flags);
+
+       v = inb(0xa1) << 8 | inb(0x21);
+       printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
+
+       v = inb(0xa0) << 8 | inb(0x20);
+       printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
+
+       outb(0x0b,0xa0);
+       outb(0x0b,0x20);
+       v = inb(0xa0) << 8 | inb(0x20);
+       outb(0x0a,0xa0);
+       outb(0x0a,0x20);
+
+       spin_unlock_irqrestore(&i8259A_lock, flags);
+
+       printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
+
+       v = inb(0x4d1) << 8 | inb(0x4d0);
+       printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+}
+
+static void __init enable_IO_APIC(void)
+{
+       struct IO_APIC_reg_01 reg_01;
+       int i;
+       unsigned long flags;
+
+       for (i = 0; i < PIN_MAP_SIZE; i++) {
+               irq_2_pin[i].pin = -1;
+               irq_2_pin[i].next = 0;
+       }
+       if (!pirqs_enabled)
+               for (i = 0; i < MAX_PIRQS; i++)
+                       pirq_entries[i] = -1;
+
+       /*
+        * The number of IO-APIC IRQ registers (== #pins):
+        */
+       for (i = 0; i < nr_ioapics; i++) {
+               spin_lock_irqsave(&ioapic_lock, flags);
+               *(int *)&reg_01 = io_apic_read(i, 1);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               nr_ioapic_registers[i] = reg_01.entries+1;
+       }
+
+       /*
+        * Do not trust the IO-APIC being empty at bootup
+        */
+       clear_IO_APIC();
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
+       /*
+        * Clear the IO-APIC before rebooting:
+        */
+       clear_IO_APIC();
+
+       disconnect_bsp_APIC();
+}
+
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
+ */
+
+static void __init setup_ioapic_ids_from_mpc (void)
+{
+       struct IO_APIC_reg_00 reg_00;
+       unsigned long phys_id_present_map = phys_cpu_present_map;
+       int apic;
+       int i;
+       unsigned char old_id;
+       unsigned long flags;
+
+       /*
+        * Set the IOAPIC ID to the value stored in the MPC table.
+        */
+       for (apic = 0; apic < nr_ioapics; apic++) {
+
+               /* Read the register 0 value */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               *(int *)&reg_00 = io_apic_read(apic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               
+               old_id = mp_ioapics[apic].mpc_apicid;
+
+               if (mp_ioapics[apic].mpc_apicid >= 0xf) {
+                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+                               apic, mp_ioapics[apic].mpc_apicid);
+                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+                               reg_00.ID);
+                       mp_ioapics[apic].mpc_apicid = reg_00.ID;
+               }
+
+               /*
+                * Sanity check, is the ID really free? Every APIC in a
+                * system must have a unique ID or we get lots of nice
+                * 'stuck on smp_invalidate_needed IPI wait' messages.
+                */
+               if (phys_id_present_map & (1 << mp_ioapics[apic].mpc_apicid)) {
+                       printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+                               apic, mp_ioapics[apic].mpc_apicid);
+                       for (i = 0; i < 0xf; i++)
+                               if (!(phys_id_present_map & (1 << i)))
+                                       break;
+                       if (i >= 0xf)
+                               panic("Max APIC ID exceeded!\n");
+                       printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+                               i);
+                       phys_id_present_map |= 1 << i;
+                       mp_ioapics[apic].mpc_apicid = i;
+               } else {
+                       printk("Setting %d in the phys_id_present_map\n", mp_ioapics[apic].mpc_apicid);
+                       phys_id_present_map |= 1 << mp_ioapics[apic].mpc_apicid;
+               }
+
+
+               /*
+                * We need to adjust the IRQ routing table
+                * if the ID changed.
+                */
+               if (old_id != mp_ioapics[apic].mpc_apicid)
+                       for (i = 0; i < mp_irq_entries; i++)
+                               if (mp_irqs[i].mpc_dstapic == old_id)
+                                       mp_irqs[i].mpc_dstapic
+                                               = mp_ioapics[apic].mpc_apicid;
+
+               /*
+                * Read the right value from the MPC table and
+                * write it into the ID register.
+                */
+               printk(KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
+                               mp_ioapics[apic].mpc_apicid);
+
+               reg_00.ID = mp_ioapics[apic].mpc_apicid;
+               spin_lock_irqsave(&ioapic_lock, flags);
+               io_apic_write(apic, 0, *(int *)&reg_00);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+
+               /*
+                * Sanity check
+                */
+               spin_lock_irqsave(&ioapic_lock, flags);
+               *(int *)&reg_00 = io_apic_read(apic, 0);
+               spin_unlock_irqrestore(&ioapic_lock, flags);
+               if (reg_00.ID != mp_ioapics[apic].mpc_apicid)
+                       panic("could not set ID!\n");
+               else
+                       printk(" ok.\n");
+       }
+}
+
+/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+ *     - timer IRQ defaults to IO-APIC IRQ
+ *     - if this function detects that timer IRQs are defunct, then we fall
+ *       back to ISA timer IRQs
+ */
+static int __init timer_irq_works(void)
+{
+       unsigned int t1 = jiffies;
+
+       sti();
+       /* Let ten ticks pass... */
+       mdelay((10 * 1000) / HZ);
+
+       /*
+        * Expect a few ticks at least, to be sure some possible
+        * glue logic does not lock up after one or two first
+        * ticks in a non-ExtINT mode.  Also the local APIC
+        * might have cached one ExtINT interrupt.  Finally, at
+        * least one tick may be lost due to delays.
+        */
+       if (jiffies - t1 > 4)
+               return 1;
+
+       return 0;
+}
+
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+/*
+ * Edge triggered needs to resend any interrupt
+ * that was delayed but this is now handled in the device
+ * independent code.
+ */
+#define enable_edge_ioapic_irq unmask_IO_APIC_irq
+
+static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ }
+
+/*
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+ * return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
+ */
+
+static unsigned int startup_edge_ioapic_irq(unsigned int irq)
+{
+       int was_pending = 0;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       if (irq < 16) {
+               disable_8259A_irq(irq);
+               if (i8259A_irq_pending(irq))
+                       was_pending = 1;
+       }
+       __unmask_IO_APIC_irq(irq);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return was_pending;
+}
+
+#define shutdown_edge_ioapic_irq       disable_edge_ioapic_irq
+
+/*
+ * Once we have recorded IRQ_PENDING already, we can mask the
+ * interrupt for real. This prevents IRQ storms from unhandled
+ * devices.
+ */
+static void ack_edge_ioapic_irq(unsigned int irq)
+{
+       if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
+                                       == (IRQ_PENDING | IRQ_DISABLED))
+               mask_IO_APIC_irq(irq);
+       ack_APIC_irq();
+}
+
+static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ }
+
+
+/*
+ * Level triggered interrupts can just be masked,
+ * and shutting down and starting up the interrupt
+ * is the same as enabling and disabling them -- except
+ * with a startup need to return a "was pending" value.
+ *
+ * Level triggered interrupts are special because we
+ * do not touch any IO-APIC register while handling
+ * them. We ack the APIC in the end-IRQ handler, not
+ * in the start-IRQ-handler. Protection against reentrance
+ * from the same interrupt is still provided, both by the
+ * generic IRQ layer and by the fact that an unacked local
+ * APIC does not accept IRQs.
+ */
+static unsigned int startup_level_ioapic_irq (unsigned int irq)
+{
+       unmask_IO_APIC_irq(irq);
+
+       return 0; /* don't check for pending */
+}
+
+#define shutdown_level_ioapic_irq      mask_IO_APIC_irq
+#define enable_level_ioapic_irq                unmask_IO_APIC_irq
+#define disable_level_ioapic_irq       mask_IO_APIC_irq
+
+static void end_level_ioapic_irq (unsigned int irq)
+{
+       unsigned long v;
+       int i;
+
+/*
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets).  Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source.  The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually.  We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt.  We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul.  --macro
+ */
+       i = IO_APIC_VECTOR(irq);
+       v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+       ack_APIC_irq();
+
+       if (!(v & (1 << (i & 0x1f)))) {
+#ifdef APIC_LOCKUP_DEBUG
+               struct irq_pin_list *entry;
+#endif
+
+#ifdef APIC_MISMATCH_DEBUG
+               atomic_inc(&irq_mis_count);
+#endif
+               spin_lock(&ioapic_lock);
+               __mask_and_edge_IO_APIC_irq(irq);
+#ifdef APIC_LOCKUP_DEBUG
+               for (entry = irq_2_pin + irq;;) {
+                       unsigned int reg;
+
+                       if (entry->pin == -1)
+                               break;
+                       reg = io_apic_read(entry->apic, 0x10 + entry->pin * 2);
+                       if (reg & 0x00004000)
+                               printk(KERN_CRIT "Aieee!!!  Remote IRR"
+                                       " still set after unlock!\n");
+                       if (!entry->next)
+                               break;
+                       entry = irq_2_pin + entry->next;
+               }
+#endif
+               __unmask_and_level_IO_APIC_irq(irq);
+               spin_unlock(&ioapic_lock);
+       }
+}
+
+static void mask_and_ack_level_ioapic_irq (unsigned int irq) { /* nothing */ }
+
+static void set_ioapic_affinity (unsigned int irq, unsigned long mask)
+{
+       unsigned long flags;
+       /*
+        * Only the first 8 bits are valid.
+        */
+       mask = mask << 24;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __DO_ACTION(1, = mask, )
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+
+static struct hw_interrupt_type ioapic_edge_irq_type = {
+       "IO-APIC-edge",
+       startup_edge_ioapic_irq,
+       shutdown_edge_ioapic_irq,
+       enable_edge_ioapic_irq,
+       disable_edge_ioapic_irq,
+       ack_edge_ioapic_irq,
+       end_edge_ioapic_irq,
+       set_ioapic_affinity,
+};
+
+static struct hw_interrupt_type ioapic_level_irq_type = {
+       "IO-APIC-level",
+       startup_level_ioapic_irq,
+       shutdown_level_ioapic_irq,
+       enable_level_ioapic_irq,
+       disable_level_ioapic_irq,
+       mask_and_ack_level_ioapic_irq,
+       end_level_ioapic_irq,
+       set_ioapic_affinity,
+};
+
+static inline void init_IO_APIC_traps(void)
+{
+       int irq;
+
+       /*
+        * NOTE! The local APIC isn't very good at handling
+        * multiple interrupts at the same interrupt level.
+        * As the interrupt level is determined by taking the
+        * vector number and shifting that right by 4, we
+        * want to spread these out a bit so that they don't
+        * all fall in the same interrupt level.
+        *
+        * Also, we've got to be careful not to trash gate
+        * 0x80, because int 0x80 is hm, kind of importantish. ;)
+        */
+       for (irq = 0; irq < NR_IRQS ; irq++) {
+               if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) {
+                       /*
+                        * Hmm.. We don't have an entry for this,
+                        * so default to an old-fashioned 8259
+                        * interrupt if we can..
+                        */
+                       if (irq < 16)
+                               make_8259A_irq(irq);
+                       else
+                               /* Strange. Oh, well.. */
+                               irq_desc[irq].handler = &no_irq_type;
+               }
+       }
+}
+
+static void enable_lapic_irq (unsigned int irq)
+{
+       unsigned long v;
+
+       v = apic_read(APIC_LVT0);
+       apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
+
+static void disable_lapic_irq (unsigned int irq)
+{
+       unsigned long v;
+
+       v = apic_read(APIC_LVT0);
+       apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+}
+
+static void ack_lapic_irq (unsigned int irq)
+{
+       ack_APIC_irq();
+}
+
+static void end_lapic_irq (unsigned int i) { /* nothing */ }
+
+static struct hw_interrupt_type lapic_irq_type = {
+       "local-APIC-edge",
+       NULL, /* startup_irq() not used for IRQ0 */
+       NULL, /* shutdown_irq() not used for IRQ0 */
+       enable_lapic_irq,
+       disable_lapic_irq,
+       ack_lapic_irq,
+       end_lapic_irq
+};
+
+static void enable_NMI_through_LVT0 (void * dummy)
+{
+       unsigned int v, ver;
+
+       ver = apic_read(APIC_LVR);
+       ver = GET_APIC_VERSION(ver);
+       v = APIC_DM_NMI;                        /* unmask and set to NMI */
+       if (!APIC_INTEGRATED(ver))              /* 82489DX */
+               v |= APIC_LVT_LEVEL_TRIGGER;
+       apic_write_around(APIC_LVT0, v);
+}
+
+static void setup_nmi (void)
+{
+       /*
+        * Dirty trick to enable the NMI watchdog ...
+        * We put the 8259A master into AEOI mode and
+        * unmask on all local APICs LVT0 as NMI.
+        *
+        * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
+        * is from Maciej W. Rozycki - so we do not have to EOI from
+        * the NMI handler or the timer interrupt.
+        */ 
+       printk(KERN_INFO "activating NMI Watchdog ...");
+
+       smp_call_function(enable_NMI_through_LVT0, NULL, 1, 1);
+       enable_NMI_through_LVT0(NULL);
+
+       printk(" done.\n");
+}
+
+/*
+ * This looks a bit hackish but it's about the only one way of sending
+ * a few INTA cycles to 8259As and any associated glue logic.  ICR does
+ * not support the ExtINT mode, unfortunately.  We need to send these
+ * cycles as some i82489DX-based boards have glue logic that keeps the
+ * 8259A interrupt line asserted until INTA.  --macro
+ */
+static inline void unlock_ExtINT_logic(void)
+{
+       int pin, i;
+       struct IO_APIC_route_entry entry0, entry1;
+       unsigned char save_control, save_freq_select;
+       unsigned long flags;
+
+       pin = find_isa_irq_pin(8, mp_INT);
+       if (pin == -1)
+               return;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
+       *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+       clear_IO_APIC_pin(0, pin);
+
+       memset(&entry1, 0, sizeof(entry1));
+
+       entry1.dest_mode = 0;                   /* physical delivery */
+       entry1.mask = 0;                        /* unmask IRQ now */
+       entry1.dest.physical.physical_dest = hard_smp_processor_id();
+       entry1.delivery_mode = dest_ExtINT;
+       entry1.polarity = entry0.polarity;
+       entry1.trigger = 0;
+       entry1.vector = 0;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+       io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       save_control = CMOS_READ(RTC_CONTROL);
+       save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+       CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+                  RTC_FREQ_SELECT);
+       CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+
+       i = 100;
+       while (i-- > 0) {
+               mdelay(10);
+               if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+                       i -= 10;
+       }
+
+       CMOS_WRITE(save_control, RTC_CONTROL);
+       CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+       clear_IO_APIC_pin(0, pin);
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+       io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * This code may look a bit paranoid, but it's supposed to cooperate with
+ * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
+ * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
+ * fanatically on his truly buggy board.
+ */
+static inline void check_timer(void)
+{
+       extern int timer_ack;
+       int pin1, pin2;
+       int vector;
+
+       /*
+        * get/set the timer IRQ vector:
+        */
+       disable_8259A_irq(0);
+       vector = assign_irq_vector(0);
+       set_intr_gate(vector, interrupt[0]);
+
+       /*
+        * Subtle, code in do_timer_interrupt() expects an AEOI
+        * mode for the 8259A whenever interrupts are routed
+        * through I/O APICs.  Also IRQ0 has to be enabled in
+        * the 8259A which implies the virtual wire has to be
+        * disabled in the local APIC.
+        */
+       apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+       init_8259A(1);
+       timer_ack = 1;
+       enable_8259A_irq(0);
+
+       pin1 = find_isa_irq_pin(0, mp_INT);
+       pin2 = find_isa_irq_pin(0, mp_ExtINT);
+
+       printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
+
+       if (pin1 != -1) {
+               /*
+                * Ok, does IRQ0 through the IOAPIC work?
+                */
+               unmask_IO_APIC_irq(0);
+               if (timer_irq_works()) {
+                       if (nmi_watchdog == NMI_IO_APIC) {
+                               disable_8259A_irq(0);
+                               setup_nmi();
+                               enable_8259A_irq(0);
+                               check_nmi_watchdog();
+                       }
+                       return;
+               }
+               clear_IO_APIC_pin(0, pin1);
+               printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
+       }
+
+       printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
+       if (pin2 != -1) {
+               printk("\n..... (found pin %d) ...", pin2);
+               /*
+                * legacy devices should be connected to IO APIC #0
+                */
+               setup_ExtINT_IRQ0_pin(pin2, vector);
+               if (timer_irq_works()) {
+                       printk("works.\n");
+                       if (nmi_watchdog == NMI_IO_APIC) {
+                               setup_nmi();
+                               check_nmi_watchdog();
+                       }
+                       return;
+               }
+               /*
+                * Cleanup, just in case ...
+                */
+               clear_IO_APIC_pin(0, pin2);
+       }
+       printk(" failed.\n");
+
+       if (nmi_watchdog) {
+               printk(KERN_WARNING "timer doesnt work through the IO-APIC - disabling NMI Watchdog!\n");
+               nmi_watchdog = 0;
+       }
+
+       printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+
+       disable_8259A_irq(0);
+       irq_desc[0].handler = &lapic_irq_type;
+       apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);   /* Fixed mode */
+       enable_8259A_irq(0);
+
+       if (timer_irq_works()) {
+               printk(" works.\n");
+               return;
+       }
+       apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
+       printk(" failed.\n");
+
+       printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
+
+       init_8259A(0);
+       make_8259A_irq(0);
+       apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
+
+       unlock_ExtINT_logic();
+
+       if (timer_irq_works()) {
+               printk(" works.\n");
+               return;
+       }
+       printk(" failed :(.\n");
+       panic("IO-APIC + timer doesn't work! pester mingo@redhat.com");
+}
+
+/*
+ *
+ * IRQ's that are handled by the old PIC in all cases:
+ * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
+ *   Linux doesn't really care, as it's not actually used
+ *   for any interrupt handling anyway.
+ * - There used to be IRQ13 here as well, but all
+ *   MPS-compliant must not use it for FPU coupling and we
+ *   want to use exception 16 anyway.  And there are
+ *   systems who connect it to an I/O APIC for other uses.
+ *   Thus we don't mark it special any longer.
+ *
+ * Additionally, something is definitely wrong with irq9
+ * on PIIX4 boards.
+ */
+#define PIC_IRQS       (1<<2)
+
+void __init setup_IO_APIC(void)
+{
+       enable_IO_APIC();
+
+       io_apic_irqs = ~PIC_IRQS;
+       printk("ENABLING IO-APIC IRQs\n");
+
+       /*
+        * Set up the IO-APIC IRQ routing table by parsing the MP-BIOS
+        * mptable:
+        */
+       setup_ioapic_ids_from_mpc();
+       sync_Arb_IDs();
+       setup_IO_APIC_irqs();
+       init_IO_APIC_traps();
+       check_timer();
+       print_IO_APIC();
+}
diff --git a/arch/x86_64/kernel/ioport.c b/arch/x86_64/kernel/ioport.c
new file mode 100644 (file)
index 0000000..a0ab1a1
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ *     linux/arch/i386/kernel/ioport.c
+ *
+ * This contains the io-permission bitmap code - written by obz, with changes
+ * by Linus.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+
+/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
+static void set_bitmap(unsigned long *bitmap, short base, short extent, int new_value)
+{
+       int mask;
+       unsigned long *bitmap_base = bitmap + (base >> 6);
+       unsigned short low_index = base & 0x3f;
+       int length = low_index + extent;
+
+       if (low_index != 0) {
+               mask = (~0 << low_index);
+               if (length < 64)
+                               mask &= ~(~0 << length);
+               if (new_value)
+                       *bitmap_base++ |= mask;
+               else
+                       *bitmap_base++ &= ~mask;
+               length -= 32;
+       }
+
+       mask = (new_value ? ~0 : 0);
+       while (length >= 64) {
+               *bitmap_base++ = mask;
+               length -= 64;
+       }
+
+       if (length > 0) {
+               mask = ~(~0 << length);
+               if (new_value)
+                       *bitmap_base++ |= mask;
+               else
+                       *bitmap_base++ &= ~mask;
+       }
+}
+
+/*
+ * this changes the io permissions bitmap in the current task.
+ */
+asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on)
+{
+       struct thread_struct * t = &current->thread;
+       struct tss_struct * tss = init_tss + smp_processor_id();
+
+       if ((from + num <= from) || (from + num > IO_BITMAP_SIZE*32))
+               return -EINVAL;
+       if (turn_on && !capable(CAP_SYS_RAWIO))
+               return -EPERM;
+       /*
+        * If it's the first ioperm() call in this thread's lifetime, set the
+        * IO bitmap up. ioperm() is much less timing critical than clone(),
+        * this is why we delay this operation until now:
+        */
+       if (!t->ioperm) {
+               /*
+                * just in case ...
+                */
+               memset(t->io_bitmap,0xff,(IO_BITMAP_SIZE+1)*4);
+               t->ioperm = 1;
+               /*
+                * this activates it in the TSS
+                */
+               tss->io_map_base = IO_BITMAP_OFFSET;
+       }
+
+       /*
+        * do it in the per-thread copy and in the TSS ...
+        */
+       set_bitmap((unsigned long *) t->io_bitmap, from, num, !turn_on);
+       set_bitmap((unsigned long *) tss->io_bitmap, from, num, !turn_on);
+
+       return 0;
+}
+
+/*
+ * sys_iopl has to be used when you want to access the IO ports
+ * beyond the 0x3ff range: to get the full 65536 ports bitmapped
+ * you'd need 8kB of bitmaps/process, which is a bit excessive.
+ *
+ * Here we just change the eflags value on the stack: we allow
+ * only the super-user to do it. This depends on the stack-layout
+ * on system-call entry - see also fork() and the signal handling
+ * code.
+ */
+
+asmlinkage long sys_iopl(unsigned int level, struct pt_regs regs)
+{
+       unsigned int old = (regs.eflags >> 12) & 3;
+
+       if (level > 3)
+               return -EINVAL;
+       /* Trying to gain more privileges? */
+       if (level > old) {
+               if (!capable(CAP_SYS_RAWIO))
+                       return -EPERM;
+       }
+       regs.eflags = (regs.eflags & 0xffffffffffffcfff) | (level << 12);
+       return 0;
+}
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
new file mode 100644 (file)
index 0000000..1297438
--- /dev/null
@@ -0,0 +1,1198 @@
+/*
+ *     linux/arch/x86_64/kernel/irq.c
+ *
+ *     Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ */
+
+/*
+ * (mostly architecture independent, will move to kernel/irq.c in 2.5.)
+ *
+ * IRQs are in fact implemented a bit like signal handlers for the kernel.
+ * Naturally it's not a 1:1 relation, but there are similarities.
+ */
+
+#include <linux/config.h>
+#include <linux/ptrace.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/kernel_stat.h>
+#include <linux/irq.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <asm/atomic.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/delay.h>
+#include <asm/desc.h>
+#include <asm/irq.h>
+
+
+
+/*
+ * Linux has a controller-independent x86 interrupt architecture.
+ * every controller has a 'controller-template', that is used
+ * by the main code to do the right thing. Each driver-visible
+ * interrupt source is transparently wired to the apropriate
+ * controller. Thus drivers need not be aware of the
+ * interrupt-controller.
+ *
+ * Various interrupt controllers we handle: 8259 PIC, SMP IO-APIC,
+ * PIIX4's internal 8259 PIC and SGI's Visual Workstation Cobalt (IO-)APIC.
+ * (IO-APICs assumed to be messaging to Pentium local-APICs)
+ *
+ * the code is designed to be easily extended with new/different
+ * interrupt controllers, without having to do assembly magic.
+ */
+
+/*
+ * Controller mappings for all interrupt sources:
+ */
+irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned =
+       { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}};
+
+static void register_irq_proc (unsigned int irq);
+
+/*
+ * Special irq handlers.
+ */
+
+void no_action(int cpl, void *dev_id, struct pt_regs *regs) { }
+
+/*
+ * Generic no controller code
+ */
+
+static void enable_none(unsigned int irq) { }
+static unsigned int startup_none(unsigned int irq) { return 0; }
+static void disable_none(unsigned int irq) { }
+static void ack_none(unsigned int irq)
+{
+/*
+ * 'what should we do if we get a hw irq event on an illegal vector'.
+ * each architecture has to answer this themselves, it doesnt deserve
+ * a generic callback i think.
+ */
+#if CONFIG_X86
+       printk("unexpected IRQ trap at vector %02x\n", irq);
+#ifdef CONFIG_X86_LOCAL_APIC
+       /*
+        * Currently unexpected vectors happen only on SMP and APIC.
+        * We _must_ ack these because every local APIC has only N
+        * irq slots per priority level, and a 'hanging, unacked' IRQ
+        * holds up an irq slot - in excessive cases (when multiple
+        * unexpected vectors occur) that might lock up the APIC
+        * completely.
+        */
+       ack_APIC_irq();
+#endif
+#endif
+}
+
+/* startup is the same as "enable", shutdown is same as "disable" */
+#define shutdown_none  disable_none
+#define end_none       enable_none
+
+struct hw_interrupt_type no_irq_type = {
+       "none",
+       startup_none,
+       shutdown_none,
+       enable_none,
+       disable_none,
+       ack_none,
+       end_none
+};
+
+atomic_t irq_err_count;
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+atomic_t irq_mis_count;
+#endif
+#endif
+
+/*
+ * Generic, controller-independent functions:
+ */
+
+int show_interrupts(struct seq_file *p, void *v)
+{
+       int i, j;
+       struct irqaction * action;
+
+       seq_printf(p, "           ");
+       for (j=0; j<smp_num_cpus; j++)
+               seq_printf(p, "CPU%d       ",j);
+       seq_putc(p, '\n');
+
+       for (i = 0 ; i < NR_IRQS ; i++) {
+               action = irq_desc[i].action;
+               if (!action) 
+                       continue;
+               seq_printf(p, "%3d: ",i);
+#ifndef CONFIG_SMP
+               seq_printf(p, "%10u ", kstat_irqs(i));
+#else
+               for (j = 0; j < smp_num_cpus; j++)
+                       seq_printf(p, "%10u ",
+                               kstat.irqs[cpu_logical_map(j)][i]);
+#endif
+               seq_printf(p, " %14s", irq_desc[i].handler->typename);
+               seq_printf(p, "  %s", action->name);
+
+               for (action=action->next; action; action = action->next)
+                       seq_printf(p, ", %s", action->name);
+               seq_putc(p, '\n');
+       }
+       seq_printf(p, "NMI: ");
+       for (j = 0; j < smp_num_cpus; j++)
+               seq_printf(p, "%10u ", nmi_count(cpu_logical_map(j)));
+       seq_putc(p, '\n');
+#if CONFIG_X86_LOCAL_APIC
+       seq_printf(p, "LOC: ");
+       for (j = 0; j < smp_num_cpus; j++)
+               seq_printf(p, "%10u ", apic_timer_irqs[cpu_logical_map(j)]);
+       seq_putc(p, '\n');
+#endif
+       seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+       seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+#endif
+#endif
+       return 0;
+}
+
+/*
+ * Global interrupt locks for SMP. Allow interrupts to come in on any
+ * CPU, yet make cli/sti act globally to protect critical regions..
+ */
+
+#ifdef CONFIG_SMP
+unsigned char global_irq_holder = NO_PROC_ID;
+unsigned volatile long global_irq_lock; /* pendantic: long for set_bit --RR */
+
+extern void show_stack(unsigned long* esp);
+
+
+/* XXX: this unfortunately doesn't support irqstacks currently, should check the other PDAs */
+static void show(char * str)
+{
+       int i;
+       int cpu = smp_processor_id();
+
+       printk("\n%s, CPU %d:\n", str, cpu);
+       printk("irq:  %d [",irqs_running());
+       for(i=0;i < smp_num_cpus;i++)
+               printk(" %d",local_irq_count(i));
+       printk(" ]\nbh:   %d [",spin_is_locked(&global_bh_lock) ? 1 : 0);
+       for(i=0;i < smp_num_cpus;i++)
+               printk(" %d",local_bh_count(i));
+
+       printk(" ]\nStack dumps:");
+       for(i = 0; i < smp_num_cpus; i++) {
+               unsigned long esp;
+               if (i == cpu)
+                       continue;
+               printk("\nCPU %d:",i);
+               esp = init_tss[i].rsp0;
+               if (!esp) {
+                       /* tss->esp0 is set to NULL in cpu_init(),
+                        * it's initialized when the cpu returns to user
+                        * space. -- manfreds
+                        */
+                       printk(" <unknown> ");
+                       continue;
+               }
+               esp &= ~(THREAD_SIZE-1);
+               esp += sizeof(struct thread_info);
+               show_stack((void*)esp);
+       }
+       printk("\nCPU %d:",cpu);
+       show_stack(NULL);
+       printk("\n");
+}
+       
+#define MAXCOUNT 100000000
+
+/*
+ * I had a lockup scenario where a tight loop doing
+ * spin_unlock()/spin_lock() on CPU#1 was racing with
+ * spin_lock() on CPU#0. CPU#0 should have noticed spin_unlock(), but
+ * apparently the spin_unlock() information did not make it
+ * through to CPU#0 ... nasty, is this by design, do we have to limit
+ * 'memory update oscillation frequency' artificially like here?
+ *
+ * Such 'high frequency update' races can be avoided by careful design, but
+ * some of our major constructs like spinlocks use similar techniques,
+ * it would be nice to clarify this issue. Set this define to 0 if you
+ * want to check whether your system freezes.  I suspect the delay done
+ * by SYNC_OTHER_CORES() is in correlation with 'snooping latency', but
+ * i thought that such things are guaranteed by design, since we use
+ * the 'LOCK' prefix.
+ */
+#define SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND 0
+
+#if SUSPECTED_CPU_OR_CHIPSET_BUG_WORKAROUND
+# define SYNC_OTHER_CORES(x) udelay(x+1)
+#else
+/*
+ * We have to allow irqs to arrive between __sti and __cli
+ */
+# define SYNC_OTHER_CORES(x) __asm__ __volatile__ ("nop")
+#endif
+
+static inline void wait_on_irq(int cpu)
+{
+       int count = MAXCOUNT;
+
+       for (;;) {
+
+               /*
+                * Wait until all interrupts are gone. Wait
+                * for bottom half handlers unless we're
+                * already executing in one..
+                */
+               if (!irqs_running())
+                       if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock))
+                               break;
+
+               /* Duh, we have to loop. Release the lock to avoid deadlocks */
+               clear_bit(0,&global_irq_lock);
+
+               for (;;) {
+                       if (!--count) {
+                               show("wait_on_irq");
+                               count = ~0;
+                       }
+                       __sti();
+                       SYNC_OTHER_CORES(cpu);
+                       __cli();
+                       if (irqs_running())
+                               continue;
+                       if (global_irq_lock)
+                               continue;
+                       if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock))
+                               continue;
+                       if (!test_and_set_bit(0,&global_irq_lock))
+                               break;
+               }
+       }
+}
+
+/*
+ * This is called when we want to synchronize with
+ * interrupts. We may for example tell a device to
+ * stop sending interrupts: but to make sure there
+ * are no interrupts that are executing on another
+ * CPU we need to call this function.
+ */
+void synchronize_irq(void)
+{
+       if (irqs_running()) {
+               /* Stupid approach */
+               cli();
+               sti();
+       }
+}
+
+static inline void get_irqlock(int cpu)
+{
+       if (test_and_set_bit(0,&global_irq_lock)) {
+               /* do we already hold the lock? */
+               if ((unsigned char) cpu == global_irq_holder)
+                       return;
+               /* Uhhuh.. Somebody else got it. Wait.. */
+               do {
+                       do {
+                               rep_nop();
+                       } while (test_bit(0,&global_irq_lock));
+               } while (test_and_set_bit(0,&global_irq_lock));         
+       }
+       /* 
+        * We also to make sure that nobody else is running
+        * in an interrupt context. 
+        */
+       wait_on_irq(cpu);
+
+       /*
+        * Ok, finally..
+        */
+       global_irq_holder = cpu;
+}
+
+#define EFLAGS_IF_SHIFT 9
+
+/*
+ * A global "cli()" while in an interrupt context
+ * turns into just a local cli(). Interrupts
+ * should use spinlocks for the (very unlikely)
+ * case that they ever want to protect against
+ * each other.
+ *
+ * If we already have local interrupts disabled,
+ * this will not turn a local disable into a
+ * global one (problems with spinlocks: this makes
+ * save_flags+cli+sti usable inside a spinlock).
+ */
+void __global_cli(void)
+{
+       unsigned int flags;
+
+       __save_flags(flags);
+       if (flags & (1 << EFLAGS_IF_SHIFT)) {
+               int cpu = smp_processor_id();
+               __cli();
+               if (!local_irq_count(cpu))
+                       get_irqlock(cpu);
+       }
+}
+
+void __global_sti(void)
+{
+       int cpu = smp_processor_id();
+
+       if (!local_irq_count(cpu))
+               release_irqlock(cpu);
+       __sti();
+}
+
+/*
+ * SMP flags value to restore to:
+ * 0 - global cli
+ * 1 - global sti
+ * 2 - local cli
+ * 3 - local sti
+ */
+unsigned long __global_save_flags(void)
+{
+       int retval;
+       int local_enabled;
+       unsigned long flags;
+       int cpu = smp_processor_id();
+
+       __save_flags(flags);
+       local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1;
+       /* default to local */
+       retval = 2 + local_enabled;
+
+       /* check for global flags if we're not in an interrupt */
+       if (!local_irq_count(cpu)) {
+               if (local_enabled)
+                       retval = 1;
+               if (global_irq_holder == cpu)
+                       retval = 0;
+       }
+       return retval;
+}
+
+void __global_restore_flags(unsigned long flags)
+{
+       switch (flags) {
+       case 0:
+               __global_cli();
+               break;
+       case 1:
+               __global_sti();
+               break;
+       case 2:
+               __cli();
+               break;
+       case 3:
+               __sti();
+               break;
+       default:
+               printk("global_restore_flags: %08lx (%08lx)\n",
+                       flags, (&flags)[-1]);
+       }
+}
+
+#endif
+
+/*
+ * This should really return information about whether
+ * we should do bottom half handling etc. Right now we
+ * end up _always_ checking the bottom half, which is a
+ * waste of time and is not what some drivers would
+ * prefer.
+ */
+int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action)
+{
+       int status;
+
+       irq_enter(0, irq);
+
+       status = 1;     /* Force the "do bottom halves" bit */
+
+       if (!(action->flags & SA_INTERRUPT))
+               __sti();
+
+       do {
+               status |= action->flags;
+               action->handler(irq, action->dev_id, regs);
+               action = action->next;
+       } while (action);
+       if (status & SA_SAMPLE_RANDOM)
+               add_interrupt_randomness(irq);
+       __cli();
+
+       irq_exit(0, irq);
+
+       return status;
+}
+
+/*
+ * Generic enable/disable code: this just calls
+ * down into the PIC-specific version for the actual
+ * hardware disable after having gotten the irq
+ * controller lock. 
+ */
+/**
+ *     disable_irq_nosync - disable an irq without waiting
+ *     @irq: Interrupt to disable
+ *
+ *     Disable the selected interrupt line.  Disables and Enables are
+ *     nested.
+ *     Unlike disable_irq(), this function does not ensure existing
+ *     instances of the IRQ handler have completed before returning.
+ *
+ *     This function may be called from IRQ context.
+ */
+inline void disable_irq_nosync(unsigned int irq)
+{
+       irq_desc_t *desc = irq_desc + irq;
+       unsigned long flags;
+
+       spin_lock_irqsave(&desc->lock, flags);
+       if (!desc->depth++) {
+               desc->status |= IRQ_DISABLED;
+               desc->handler->disable(irq);
+       }
+       spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/**
+ *     disable_irq - disable an irq and wait for completion
+ *     @irq: Interrupt to disable
+ *
+ *     Disable the selected interrupt line.  Enables and Disables are
+ *     nested.
+ *     This function waits for any pending IRQ handlers for this interrupt
+ *     to complete before returning. If you use this function while
+ *     holding a resource the IRQ handler may need you will deadlock.
+ *
+ *     This function may be called - with care - from IRQ context.
+ */
+void disable_irq(unsigned int irq)
+{
+       disable_irq_nosync(irq);
+
+       if (!local_irq_count(smp_processor_id())) {
+               do {
+                       barrier();
+                       cpu_relax();
+               } while (irq_desc[irq].status & IRQ_INPROGRESS);
+       }
+}
+
+/**
+ *     enable_irq - enable handling of an irq
+ *     @irq: Interrupt to enable
+ *
+ *     Undoes the effect of one call to disable_irq().  If this
+ *     matches the last disable, processing of interrupts on this
+ *     IRQ line is re-enabled.
+ *
+ *     This function may be called from IRQ context.
+ */
+void enable_irq(unsigned int irq)
+{
+       irq_desc_t *desc = irq_desc + irq;
+       unsigned long flags;
+
+       spin_lock_irqsave(&desc->lock, flags);
+       switch (desc->depth) {
+       case 1: {
+               unsigned int status = desc->status & ~IRQ_DISABLED;
+               desc->status = status;
+               if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+                       desc->status = status | IRQ_REPLAY;
+                       hw_resend_irq(desc->handler,irq);
+               }
+               desc->handler->enable(irq);
+               /* fall-through */
+       }
+       default:
+               desc->depth--;
+               break;
+       case 0:
+               printk("enable_irq(%u) unbalanced from %p\n", irq,
+                      __builtin_return_address(0));
+       }
+       spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+/*
+ * do_IRQ handles all normal device IRQ's (the special
+ * SMP cross-CPU interrupts have their own specific
+ * handlers).
+ */
+asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
+{      
+       /* 
+        * We ack quickly, we don't want the irq controller
+        * thinking we're snobs just because some other CPU has
+        * disabled global interrupts (we have already done the
+        * INT_ACK cycles, it's too late to try to pretend to the
+        * controller that we aren't taking the interrupt).
+        *
+        * 0 return value means that this irq is already being
+        * handled by some other CPU. (or is disabled)
+        */
+       int irq = regs->orig_rax & 0xff; /* high bits used in ret_from_ code  */
+       int cpu = smp_processor_id();
+       irq_desc_t *desc = irq_desc + irq;
+       struct irqaction * action;
+       unsigned int status;
+
+       kstat.irqs[cpu][irq]++;
+       spin_lock(&desc->lock);
+       desc->handler->ack(irq);
+       /*
+          REPLAY is when Linux resends an IRQ that was dropped earlier
+          WAITING is used by probe to mark irqs that are being tested
+          */
+       status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
+       status |= IRQ_PENDING; /* we _want_ to handle it */
+
+       /*
+        * If the IRQ is disabled for whatever reason, we cannot
+        * use the action we have.
+        */
+       action = NULL;
+       if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
+               action = desc->action;
+               status &= ~IRQ_PENDING; /* we commit to handling */
+               status |= IRQ_INPROGRESS; /* we are handling it */
+       }
+       desc->status = status;
+
+       /*
+        * If there is no IRQ handler or it was disabled, exit early.
+          Since we set PENDING, if another processor is handling
+          a different instance of this same irq, the other processor
+          will take care of it.
+        */
+       if (!action)
+               goto out;
+
+       /*
+        * Edge triggered interrupts need to remember
+        * pending events.
+        * This applies to any hw interrupts that allow a second
+        * instance of the same irq to arrive while we are in do_IRQ
+        * or in the handler. But the code here only handles the _second_
+        * instance of the irq, not the third or fourth. So it is mostly
+        * useful for irq hardware that does not mask cleanly in an
+        * SMP environment.
+        */
+       for (;;) {
+               spin_unlock(&desc->lock);
+               handle_IRQ_event(irq, regs, action);
+               spin_lock(&desc->lock);
+               
+               if (!(desc->status & IRQ_PENDING))
+                       break;
+               desc->status &= ~IRQ_PENDING;
+       }
+       desc->status &= ~IRQ_INPROGRESS;
+out:
+       /*
+        * The ->end() handler has to deal with interrupts which got
+        * disabled while the handler was running.
+        */
+       desc->handler->end(irq);
+       spin_unlock(&desc->lock);
+
+       if (softirq_pending(cpu))
+               do_softirq();
+       return 1;
+}
+
+/**
+ *     request_irq - allocate an interrupt line
+ *     @irq: Interrupt line to allocate
+ *     @handler: Function to be called when the IRQ occurs
+ *     @irqflags: Interrupt type flags
+ *     @devname: An ascii name for the claiming device
+ *     @dev_id: A cookie passed back to the handler function
+ *
+ *     This call allocates interrupt resources and enables the
+ *     interrupt line and IRQ handling. From the point this
+ *     call is made your handler function may be invoked. Since
+ *     your handler function must clear any interrupt the board 
+ *     raises, you must take care both to initialise your hardware
+ *     and to set up the interrupt handler in the right order.
+ *
+ *     Dev_id must be globally unique. Normally the address of the
+ *     device data structure is used as the cookie. Since the handler
+ *     receives this value it makes sense to use it.
+ *
+ *     If your interrupt is shared you must pass a non NULL dev_id
+ *     as this is required when freeing the interrupt.
+ *
+ *     Flags:
+ *
+ *     SA_SHIRQ                Interrupt is shared
+ *
+ *     SA_INTERRUPT            Disable local interrupts while processing
+ *
+ *     SA_SAMPLE_RANDOM        The interrupt can be used for entropy
+ *
+ */
+int request_irq(unsigned int irq, 
+               void (*handler)(int, void *, struct pt_regs *),
+               unsigned long irqflags, 
+               const char * devname,
+               void *dev_id)
+{
+       int retval;
+       struct irqaction * action;
+
+#if 1
+       /*
+        * Sanity-check: shared interrupts should REALLY pass in
+        * a real dev-ID, otherwise we'll have trouble later trying
+        * to figure out which interrupt is which (messes up the
+        * interrupt freeing logic etc).
+        */
+       if (irqflags & SA_SHIRQ) {
+               if (!dev_id)
+                       printk("Bad boy: %s (at 0x%x) called us without a dev_id!\n", devname, (&irq)[-1]);
+       }
+#endif
+
+       if (irq >= NR_IRQS)
+               return -EINVAL;
+       if (!handler)
+               return -EINVAL;
+
+       action = (struct irqaction *)
+                       kmalloc(sizeof(struct irqaction), GFP_KERNEL);
+       if (!action)
+               return -ENOMEM;
+
+       action->handler = handler;
+       action->flags = irqflags;
+       action->mask = 0;
+       action->name = devname;
+       action->next = NULL;
+       action->dev_id = dev_id;
+
+       retval = setup_irq(irq, action);
+       if (retval)
+               kfree(action);
+       return retval;
+}
+
+/**
+ *     free_irq - free an interrupt
+ *     @irq: Interrupt line to free
+ *     @dev_id: Device identity to free
+ *
+ *     Remove an interrupt handler. The handler is removed and if the
+ *     interrupt line is no longer in use by any driver it is disabled.
+ *     On a shared IRQ the caller must ensure the interrupt is disabled
+ *     on the card it drives before calling this function. The function
+ *     does not return until any executing interrupts for this IRQ
+ *     have completed.
+ *
+ *     This function may be called from interrupt context. 
+ *
+ *     Bugs: Attempting to free an irq in a handler for the same irq hangs
+ *           the machine.
+ */
+void free_irq(unsigned int irq, void *dev_id)
+{
+       irq_desc_t *desc;
+       struct irqaction **p;
+       unsigned long flags;
+
+       if (irq >= NR_IRQS)
+               return;
+
+       desc = irq_desc + irq;
+       spin_lock_irqsave(&desc->lock,flags);
+       p = &desc->action;
+       for (;;) {
+               struct irqaction * action = *p;
+               if (action) {
+                       struct irqaction **pp = p;
+                       p = &action->next;
+                       if (action->dev_id != dev_id)
+                               continue;
+
+                       /* Found it - now remove it from the list of entries */
+                       *pp = action->next;
+                       if (!desc->action) {
+                               desc->status |= IRQ_DISABLED;
+                               desc->handler->shutdown(irq);
+                       }
+                       spin_unlock_irqrestore(&desc->lock,flags);
+
+#ifdef CONFIG_SMP
+                       /* Wait to make sure it's not being used on another CPU */
+                       while (desc->status & IRQ_INPROGRESS) {
+                               barrier();
+                               cpu_relax();
+                       }
+#endif
+                       kfree(action);
+                       return;
+               }
+               printk("Trying to free free IRQ%d\n",irq);
+               spin_unlock_irqrestore(&desc->lock,flags);
+               return;
+       }
+}
+
+/*
+ * IRQ autodetection code..
+ *
+ * This depends on the fact that any interrupt that
+ * comes in on to an unassigned handler will get stuck
+ * with "IRQ_WAITING" cleared and the interrupt
+ * disabled.
+ */
+
+static DECLARE_MUTEX(probe_sem);
+
+/**
+ *     probe_irq_on    - begin an interrupt autodetect
+ *
+ *     Commence probing for an interrupt. The interrupts are scanned
+ *     and a mask of potential interrupt lines is returned.
+ *
+ */
+unsigned long probe_irq_on(void)
+{
+       unsigned int i;
+       irq_desc_t *desc;
+       unsigned long val;
+       unsigned long delay;
+
+       down(&probe_sem);
+       /* 
+        * something may have generated an irq long ago and we want to
+        * flush such a longstanding irq before considering it as spurious. 
+        */
+       for (i = NR_IRQS-1; i > 0; i--)  {
+               desc = irq_desc + i;
+
+               spin_lock_irq(&desc->lock);
+               if (!irq_desc[i].action) 
+                       irq_desc[i].handler->startup(i);
+               spin_unlock_irq(&desc->lock);
+       }
+
+       /* Wait for longstanding interrupts to trigger. */
+       for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
+               /* about 20ms delay */ synchronize_irq();
+
+       /*
+        * enable any unassigned irqs
+        * (we must startup again here because if a longstanding irq
+        * happened in the previous stage, it may have masked itself)
+        */
+       for (i = NR_IRQS-1; i > 0; i--) {
+               desc = irq_desc + i;
+
+               spin_lock_irq(&desc->lock);
+               if (!desc->action) {
+                       desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
+                       if (desc->handler->startup(i))
+                               desc->status |= IRQ_PENDING;
+               }
+               spin_unlock_irq(&desc->lock);
+       }
+
+       /*
+        * Wait for spurious interrupts to trigger
+        */
+       for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
+               /* about 100ms delay */ synchronize_irq();
+
+       /*
+        * Now filter out any obviously spurious interrupts
+        */
+       val = 0;
+       for (i = 0; i < NR_IRQS; i++) {
+               irq_desc_t *desc = irq_desc + i;
+               unsigned int status;
+
+               spin_lock_irq(&desc->lock);
+               status = desc->status;
+
+               if (status & IRQ_AUTODETECT) {
+                       /* It triggered already - consider it spurious. */
+                       if (!(status & IRQ_WAITING)) {
+                               desc->status = status & ~IRQ_AUTODETECT;
+                               desc->handler->shutdown(i);
+                       } else
+                               if (i < 32)
+                                       val |= 1 << i;
+               }
+               spin_unlock_irq(&desc->lock);
+       }
+
+       return val;
+}
+
+/*
+ * Return a mask of triggered interrupts (this
+ * can handle only legacy ISA interrupts).
+ */
+/**
+ *     probe_irq_mask - scan a bitmap of interrupt lines
+ *     @val:   mask of interrupts to consider
+ *
+ *     Scan the ISA bus interrupt lines and return a bitmap of
+ *     active interrupts. The interrupt probe logic state is then
+ *     returned to its previous value.
+ *
+ *     Note: we need to scan all the irq's even though we will
+ *     only return ISA irq numbers - just so that we reset them
+ *     all to a known state.
+ */
+unsigned int probe_irq_mask(unsigned long val)
+{
+       int i;
+       unsigned int mask;
+
+       mask = 0;
+       for (i = 0; i < NR_IRQS; i++) {
+               irq_desc_t *desc = irq_desc + i;
+               unsigned int status;
+
+               spin_lock_irq(&desc->lock);
+               status = desc->status;
+
+               if (status & IRQ_AUTODETECT) {
+                       if (i < 16 && !(status & IRQ_WAITING))
+                               mask |= 1 << i;
+
+                       desc->status = status & ~IRQ_AUTODETECT;
+                       desc->handler->shutdown(i);
+               }
+               spin_unlock_irq(&desc->lock);
+       }
+       up(&probe_sem);
+
+       return mask & val;
+}
+
+/*
+ * Return the one interrupt that triggered (this can
+ * handle any interrupt source).
+ */
+
+/**
+ *     probe_irq_off   - end an interrupt autodetect
+ *     @val: mask of potential interrupts (unused)
+ *
+ *     Scans the unused interrupt lines and returns the line which
+ *     appears to have triggered the interrupt. If no interrupt was
+ *     found then zero is returned. If more than one interrupt is
+ *     found then minus the first candidate is returned to indicate
+ *     their is doubt.
+ *
+ *     The interrupt probe logic state is returned to its previous
+ *     value.
+ *
+ *     BUGS: When used in a module (which arguably shouldnt happen)
+ *     nothing prevents two IRQ probe callers from overlapping. The
+ *     results of this are non-optimal.
+ */
+int probe_irq_off(unsigned long val)
+{
+       int i, irq_found, nr_irqs;
+
+       nr_irqs = 0;
+       irq_found = 0;
+       for (i = 0; i < NR_IRQS; i++) {
+               irq_desc_t *desc = irq_desc + i;
+               unsigned int status;
+
+               spin_lock_irq(&desc->lock);
+               status = desc->status;
+
+               if (status & IRQ_AUTODETECT) {
+                       if (!(status & IRQ_WAITING)) {
+                               if (!nr_irqs)
+                                       irq_found = i;
+                               nr_irqs++;
+                       }
+                       desc->status = status & ~IRQ_AUTODETECT;
+                       desc->handler->shutdown(i);
+               }
+               spin_unlock_irq(&desc->lock);
+       }
+       up(&probe_sem);
+
+       if (nr_irqs > 1)
+               irq_found = -irq_found;
+       return irq_found;
+}
+
+/* this was setup_x86_irq but it seems pretty generic */
+int setup_irq(unsigned int irq, struct irqaction * new)
+{
+       int shared = 0;
+       unsigned long flags;
+       struct irqaction *old, **p;
+       irq_desc_t *desc = irq_desc + irq;
+
+       /*
+        * Some drivers like serial.c use request_irq() heavily,
+        * so we have to be careful not to interfere with a
+        * running system.
+        */
+       if (new->flags & SA_SAMPLE_RANDOM) {
+               /*
+                * This function might sleep, we want to call it first,
+                * outside of the atomic block.
+                * Yes, this might clear the entropy pool if the wrong
+                * driver is attempted to be loaded, without actually
+                * installing a new handler, but is this really a problem,
+                * only the sysadmin is able to do this.
+                */
+               rand_initialize_irq(irq);
+       }
+
+       /*
+        * The following block of code has to be executed atomically
+        */
+       spin_lock_irqsave(&desc->lock,flags);
+       p = &desc->action;
+       if ((old = *p) != NULL) {
+               /* Can't share interrupts unless both agree to */
+               if (!(old->flags & new->flags & SA_SHIRQ)) {
+                       spin_unlock_irqrestore(&desc->lock,flags);
+                       return -EBUSY;
+               }
+
+               /* add new interrupt at end of irq queue */
+               do {
+                       p = &old->next;
+                       old = *p;
+               } while (old);
+               shared = 1;
+       }
+
+       *p = new;
+
+       if (!shared) {
+               desc->depth = 0;
+               desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT | IRQ_WAITING);
+               desc->handler->startup(irq);
+       }
+       spin_unlock_irqrestore(&desc->lock,flags);
+
+       register_irq_proc(irq);
+       return 0;
+}
+
+static struct proc_dir_entry * root_irq_dir;
+static struct proc_dir_entry * irq_dir [NR_IRQS];
+
+#define HEX_DIGITS 8
+
+static unsigned int parse_hex_value (const char *buffer,
+               unsigned long count, unsigned long *ret)
+{
+       unsigned char hexnum [HEX_DIGITS];
+       unsigned long value;
+       int i;
+
+       if (!count)
+               return -EINVAL;
+       if (count > HEX_DIGITS)
+               count = HEX_DIGITS;
+       if (copy_from_user(hexnum, buffer, count))
+               return -EFAULT;
+
+       /*
+        * Parse the first 8 characters as a hex string, any non-hex char
+        * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same.
+        */
+       value = 0;
+
+       for (i = 0; i < count; i++) {
+               unsigned int c = hexnum[i];
+
+               switch (c) {
+                       case '0' ... '9': c -= '0'; break;
+                       case 'a' ... 'f': c -= 'a'-10; break;
+                       case 'A' ... 'F': c -= 'A'-10; break;
+               default:
+                       goto out;
+               }
+               value = (value << 4) | c;
+       }
+out:
+       *ret = value;
+       return 0;
+}
+
+#if CONFIG_SMP
+
+static struct proc_dir_entry * smp_affinity_entry [NR_IRQS];
+
+static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL };
+static int irq_affinity_read_proc (char *page, char **start, off_t off,
+                       int count, int *eof, void *data)
+{
+       if (count < HEX_DIGITS+1)
+               return -EINVAL;
+       return sprintf (page, "%08lx\n", irq_affinity[(long)data]);
+}
+
+static int irq_affinity_write_proc (struct file *file, const char *buffer,
+                                       unsigned long count, void *data)
+{
+       int irq = (long) data, full_count = count, err;
+       unsigned long new_value;
+
+       if (!irq_desc[irq].handler->set_affinity)
+               return -EIO;
+
+       err = parse_hex_value(buffer, count, &new_value);
+
+       /*
+        * Do not allow disabling IRQs completely - it's a too easy
+        * way to make the system unusable accidentally :-) At least
+        * one online CPU still has to be targeted.
+        */
+       if (!(new_value & cpu_online_map))
+               return -EINVAL;
+
+       irq_affinity[irq] = new_value;
+       irq_desc[irq].handler->set_affinity(irq, new_value);
+
+       return full_count;
+}
+
+#endif
+
+static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
+                       int count, int *eof, void *data)
+{
+       unsigned long *mask = (unsigned long *) data;
+       if (count < HEX_DIGITS+1)
+               return -EINVAL;
+       return sprintf (page, "%08lx\n", *mask);
+}
+
+static int prof_cpu_mask_write_proc (struct file *file, const char *buffer,
+                                       unsigned long count, void *data)
+{
+       unsigned long *mask = (unsigned long *) data, full_count = count, err;
+       unsigned long new_value;
+
+       err = parse_hex_value(buffer, count, &new_value);
+       if (err)
+               return err;
+
+       *mask = new_value;
+       return full_count;
+}
+
+#define MAX_NAMELEN 10
+
+static void register_irq_proc (unsigned int irq)
+{
+       char name [MAX_NAMELEN];
+
+       if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) ||
+                       irq_dir[irq])
+               return;
+
+       memset(name, 0, MAX_NAMELEN);
+       sprintf(name, "%d", irq);
+
+       /* create /proc/irq/1234 */
+       irq_dir[irq] = proc_mkdir(name, root_irq_dir);
+
+#if CONFIG_SMP
+       {
+               struct proc_dir_entry *entry;
+
+               /* create /proc/irq/1234/smp_affinity */
+               entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
+
+               if (entry) {
+                       entry->nlink = 1;
+                       entry->data = (void *)(long)irq;
+                       entry->read_proc = irq_affinity_read_proc;
+                       entry->write_proc = irq_affinity_write_proc;
+               }
+
+               smp_affinity_entry[irq] = entry;
+       }
+#endif
+}
+
+unsigned long prof_cpu_mask = -1;
+
+void init_irq_proc (void)
+{
+       struct proc_dir_entry *entry;
+       int i;
+
+       /* create /proc/irq */
+       root_irq_dir = proc_mkdir("irq", 0);
+
+       /* create /proc/irq/prof_cpu_mask */
+       entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
+
+       if (!entry)
+           return;
+
+       entry->nlink = 1;
+       entry->data = (void *)&prof_cpu_mask;
+       entry->read_proc = prof_cpu_mask_read_proc;
+       entry->write_proc = prof_cpu_mask_write_proc;
+
+       /*
+        * Create entries for all existing IRQs.
+        */
+       for (i = 0; i < NR_IRQS; i++)
+               register_irq_proc(i);
+}
+
diff --git a/arch/x86_64/kernel/ldt.c b/arch/x86_64/kernel/ldt.c
new file mode 100644 (file)
index 0000000..91c2b1f
--- /dev/null
@@ -0,0 +1,177 @@
+/*
+ * linux/kernel/ldt.c
+ *
+ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+/* 
+ * FIXME: forbid code segment setting for 64bit mode. doesn't work with SYSCALL 
+ */ 
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/vmalloc.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/ldt.h>
+#include <asm/desc.h>
+
+/*
+ * read_ldt() is not really atomic - this is not a problem since
+ * synchronization of reads and writes done to the LDT has to be
+ * assured by user-space anyway. Writes are atomic, to protect
+ * the security checks done on new descriptors.
+ */
+static int read_ldt(void * ptr, unsigned long bytecount)
+{
+       int err;
+       unsigned long size;
+       struct mm_struct * mm = current->mm;
+
+       err = 0;
+       if (!mm->context.segments)
+               goto out;
+
+       size = LDT_ENTRIES*LDT_ENTRY_SIZE;
+       if (size > bytecount)
+               size = bytecount;
+
+       err = size;
+       if (copy_to_user(ptr, mm->context.segments, size))
+               err = -EFAULT;
+out:
+       return err;
+}
+
+static int read_default_ldt(void * ptr, unsigned long bytecount)
+{
+       int err;
+       unsigned long size;
+       void *address;
+
+       err = 0;
+       address = &default_ldt[0];
+       size = sizeof(struct desc_struct);
+       if (size > bytecount)
+               size = bytecount;
+
+       err = size;
+       if (copy_to_user(ptr, address, size))
+               err = -EFAULT;
+
+       return err;
+}
+
+static int write_ldt(void * ptr, unsigned long bytecount, int oldmode)
+{
+       struct mm_struct * mm = current->mm;
+       __u32 entry_1, entry_2, *lp;
+       int error;
+       struct modify_ldt_ldt_s ldt_info;
+
+       error = -EINVAL;
+       if (bytecount != sizeof(ldt_info))
+               goto out;
+       error = -EFAULT;        
+       if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
+               goto out;
+
+       error = -EINVAL;
+       if (ldt_info.entry_number >= LDT_ENTRIES)
+               goto out;
+       if (ldt_info.contents == 3) {
+               if (oldmode)
+                       goto out;
+               if (ldt_info.seg_not_present == 0)
+                       goto out;
+       }
+
+       current->thread.fsindex = 0; 
+       current->thread.gsindex = 0; 
+
+       /*
+        * the GDT index of the LDT is allocated dynamically, and is
+        * limited by MAX_LDT_DESCRIPTORS.
+        */
+       down_write(&mm->mmap_sem);
+       if (!mm->context.segments) {
+               void * segments = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
+               error = -ENOMEM;
+               if (!segments)
+                       goto out_unlock;
+               memset(segments, 0, LDT_ENTRIES*LDT_ENTRY_SIZE);
+               wmb();
+               mm->context.segments = segments;
+               mm->context.cpuvalid = 1UL << smp_processor_id();
+               load_LDT(mm);
+       }
+
+       lp = (__u32 *) ((ldt_info.entry_number << 3) + (char *) mm->context.segments);
+
+       /* Allow LDTs to be cleared by the user. */
+       if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+               if (oldmode ||
+                   (ldt_info.contents == 0             &&
+                    ldt_info.read_exec_only == 1       &&
+                    ldt_info.seg_32bit == 0            &&
+                    ldt_info.limit_in_pages == 0       &&
+                    ldt_info.seg_not_present == 1      &&
+                    ldt_info.useable == 0 )) {
+                       entry_1 = 0;
+                       entry_2 = 0;
+                       goto install;
+               }
+       }
+
+       entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
+                 (ldt_info.limit & 0x0ffff);
+       entry_2 = (ldt_info.base_addr & 0xff000000) |
+                 ((ldt_info.base_addr & 0x00ff0000) >> 16) |
+                 (ldt_info.limit & 0xf0000) |
+                 ((ldt_info.read_exec_only ^ 1) << 9) |
+                 (ldt_info.contents << 10) |
+                 ((ldt_info.seg_not_present ^ 1) << 15) |
+                 (ldt_info.seg_32bit << 22) |
+                 (ldt_info.limit_in_pages << 23) |
+                 0x7000;
+       if (!oldmode)
+               entry_2 |= (ldt_info.useable << 20);
+
+       /* Install the new entry ...  */
+install:
+       *lp     = entry_1;
+       *(lp+1) = entry_2;
+       error = 0;
+
+out_unlock:
+       up_write(&mm->mmap_sem);
+out:
+       return error;
+}
+
+asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount)
+{
+       int ret = -ENOSYS;
+
+       switch (func) {
+       case 0:
+               ret = read_ldt(ptr, bytecount);
+               break;
+       case 1:
+               ret = write_ldt(ptr, bytecount, 1);
+               break;
+       case 2:
+               ret = read_default_ldt(ptr, bytecount);
+               break;
+       case 0x11:
+               ret = write_ldt(ptr, bytecount, 0);
+               break;
+       }
+       return ret;
+}
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
new file mode 100644 (file)
index 0000000..1f6e628
--- /dev/null
@@ -0,0 +1,670 @@
+/*
+ *     Intel Multiprocessor Specificiation 1.1 and 1.4
+ *     compliant MP-table parsing routines.
+ *
+ *     (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *     (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *     Fixes
+ *             Erich Boleyn    :       MP v1.4 and additional changes.
+ *             Alan Cox        :       Added EBDA scanning
+ *             Ingo Molnar     :       various cleanups and rewrites
+ *     Maciej W. Rozycki       :       Bits for default MP configurations
+ */
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/config.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+#include <asm/pgalloc.h>
+
+/* Have we found an MP table */
+int smp_found_config = 0;
+
+/*
+ * Various Linux-internal data structures created from the
+ * MP-table.
+ */
+int apic_version [MAX_APICS];
+int mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+int mp_current_pci_id = 0;
+/* I/O APIC entries */
+struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* MP IRQ source entries */
+int mp_irq_entries;
+
+int nr_ioapics;
+int pic_mode;
+unsigned long mp_lapic_addr = 0;
+
+
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_id = -1U;
+/* Internal processor count */
+static unsigned int num_processors = 0;
+
+/* Bitmask of physically existing CPUs */
+unsigned long phys_cpu_present_map = 0;
+
+/*
+ * Intel MP BIOS table parsing routines:
+ */
+
+#ifndef CONFIG_X86_VISWS_APIC
+/*
+ * Checksum an MP configuration block.
+ */
+
+static int __init mpf_checksum(unsigned char *mp, int len)
+{
+       int sum = 0;
+
+       while (len--)
+               sum += *mp++;
+
+       return sum & 0xFF;
+}
+
+/*
+ * Processor encoding in an MP configuration block
+ */
+
+static char __init *mpc_family(int family,int model)
+{
+       static char n[32];
+       static char *model_defs[]=
+       {
+               "80486DX","80486DX",
+               "80486SX","80486DX/2 or 80487",
+               "80486SL","80486SX/2",
+               "Unknown","80486DX/2-WB",
+               "80486DX/4","80486DX/4-WB"
+       };
+
+       switch (family) {
+               case 0x04:
+                       if (model < 10)
+                               return model_defs[model];
+                       break;
+
+               case 0x05:
+                       return("Pentium(tm)");
+
+               case 0x06:
+                       return("Pentium(tm) Pro");
+
+               case 0x0F:
+                       if (model == 0x0F)
+                               return("Special controller");
+       }
+       sprintf(n,"Unknown CPU [%d:%d]",family, model);
+       return n;
+}
+
+static void __init MP_processor_info (struct mpc_config_processor *m)
+{
+       int ver;
+
+       if (!(m->mpc_cpuflag & CPU_ENABLED))
+               return;
+
+       printk("Processor #%d %s APIC version %d\n",
+               m->mpc_apicid,
+               mpc_family(     (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 ,
+                               (m->mpc_cpufeature & CPU_MODEL_MASK)>>4),
+               m->mpc_apicver);
+
+       if (m->mpc_featureflag&(1<<0))
+               Dprintk("    Floating point unit present.\n");
+       if (m->mpc_featureflag&(1<<7))
+               Dprintk("    Machine Exception supported.\n");
+       if (m->mpc_featureflag&(1<<8))
+               Dprintk("    64 bit compare & exchange supported.\n");
+       if (m->mpc_featureflag&(1<<9))
+               Dprintk("    Internal APIC present.\n");
+
+       if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+               Dprintk("    Bootup CPU\n");
+               boot_cpu_id = m->mpc_apicid;
+       }
+       num_processors++;
+
+       if (m->mpc_apicid > MAX_APICS) {
+               printk("Processor #%d INVALID. (Max ID: %d).\n",
+                       m->mpc_apicid, MAX_APICS);
+               return;
+       }
+       ver = m->mpc_apicver;
+
+       phys_cpu_present_map |= 1 << m->mpc_apicid;
+       /*
+        * Validate version
+        */
+       if (ver == 0x0) {
+               printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
+               ver = 0x10;
+       }
+       apic_version[m->mpc_apicid] = ver;
+}
+
+static void __init MP_bus_info (struct mpc_config_bus *m)
+{
+       char str[7];
+
+       memcpy(str, m->mpc_bustype, 6);
+       str[6] = 0;
+       Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
+
+       if (strncmp(str, "ISA", 3) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+       } else if (strncmp(str, "EISA", 4) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+       } else if (strncmp(str, "PCI", 3) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+               mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+               mp_current_pci_id++;
+       } else if (strncmp(str, "MCA", 3) == 0) {
+               mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+       } else {
+               printk("Unknown bustype %s\n", str);
+               panic("cannot handle bus - mail to linux-smp@vger.kernel.org");
+       }
+}
+
+static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+{
+       if (!(m->mpc_flags & MPC_APIC_USABLE))
+               return;
+
+       printk("I/O APIC #%d Version %d at 0x%X.\n",
+               m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+       if (nr_ioapics >= MAX_IO_APICS) {
+               printk("Max # of I/O APICs (%d) exceeded (found %d).\n",
+                       MAX_IO_APICS, nr_ioapics);
+               panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+       }
+       if (!m->mpc_apicaddr) {
+               printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
+                       " found in MP table, skipping!\n");
+               return;
+       }
+       mp_ioapics[nr_ioapics] = *m;
+       nr_ioapics++;
+}
+
+static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+{
+       mp_irqs [mp_irq_entries] = *m;
+       Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+               " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+                       m->mpc_irqtype, m->mpc_irqflag & 3,
+                       (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+                       m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+       if (++mp_irq_entries == MAX_IRQ_SOURCES)
+               panic("Max # of irq sources exceeded!!\n");
+}
+
+static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+{
+       Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+               " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
+                       m->mpc_irqtype, m->mpc_irqflag & 3,
+                       (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+                       m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+       /*
+        * Well it seems all SMP boards in existence
+        * use ExtINT/LVT1 == LINT0 and
+        * NMI/LVT2 == LINT1 - the following check
+        * will show us if this assumptions is false.
+        * Until then we do not have to add baggage.
+        */
+       if ((m->mpc_irqtype == mp_ExtINT) &&
+               (m->mpc_destapiclint != 0))
+                       BUG();
+       if ((m->mpc_irqtype == mp_NMI) &&
+               (m->mpc_destapiclint != 1))
+                       BUG();
+}
+
+/*
+ * Read/parse the MPC
+ */
+
+static int __init smp_read_mpc(struct mp_config_table *mpc)
+{
+       char str[16];
+       int count=sizeof(*mpc);
+       unsigned char *mpt=((unsigned char *)mpc)+count;
+
+       if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+               panic("SMP mptable: bad signature [%c%c%c%c]!\n",
+                       mpc->mpc_signature[0],
+                       mpc->mpc_signature[1],
+                       mpc->mpc_signature[2],
+                       mpc->mpc_signature[3]);
+               return 0;
+       }
+       if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+               panic("SMP mptable: checksum error!\n");
+               return 0;
+       }
+       if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+               printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
+                       mpc->mpc_spec);
+               return 0;
+       }
+       if (!mpc->mpc_lapic) {
+               printk(KERN_ERR "SMP mptable: null local APIC address!\n");
+               return 0;
+       }
+       memcpy(str,mpc->mpc_oem,8);
+       str[8]=0;
+       printk("OEM ID: %s ",str);
+
+       memcpy(str,mpc->mpc_productid,12);
+       str[12]=0;
+       printk("Product ID: %s ",str);
+
+       printk("APIC at: 0x%X\n",mpc->mpc_lapic);
+
+       /* save the local APIC address, it might be non-default */
+       mp_lapic_addr = mpc->mpc_lapic;
+
+       /*
+        *      Now process the configuration blocks.
+        */
+       while (count < mpc->mpc_length) {
+               switch(*mpt) {
+                       case MP_PROCESSOR:
+                       {
+                               struct mpc_config_processor *m=
+                                       (struct mpc_config_processor *)mpt;
+                               MP_processor_info(m);
+                               mpt += sizeof(*m);
+                               count += sizeof(*m);
+                               break;
+                       }
+                       case MP_BUS:
+                       {
+                               struct mpc_config_bus *m=
+                                       (struct mpc_config_bus *)mpt;
+                               MP_bus_info(m);
+                               mpt += sizeof(*m);
+                               count += sizeof(*m);
+                               break;
+                       }
+                       case MP_IOAPIC:
+                       {
+                               struct mpc_config_ioapic *m=
+                                       (struct mpc_config_ioapic *)mpt;
+                               MP_ioapic_info(m);
+                               mpt+=sizeof(*m);
+                               count+=sizeof(*m);
+                               break;
+                       }
+                       case MP_INTSRC:
+                       {
+                               struct mpc_config_intsrc *m=
+                                       (struct mpc_config_intsrc *)mpt;
+
+                               MP_intsrc_info(m);
+                               mpt+=sizeof(*m);
+                               count+=sizeof(*m);
+                               break;
+                       }
+                       case MP_LINTSRC:
+                       {
+                               struct mpc_config_lintsrc *m=
+                                       (struct mpc_config_lintsrc *)mpt;
+                               MP_lintsrc_info(m);
+                               mpt+=sizeof(*m);
+                               count+=sizeof(*m);
+                               break;
+                       }
+               }
+       }
+       if (!num_processors)
+               printk(KERN_ERR "SMP mptable: no processors registered!\n");
+       return num_processors;
+}
+
+static int __init ELCR_trigger(unsigned int irq)
+{
+       unsigned int port;
+
+       port = 0x4d0 + (irq >> 3);
+       return (inb(port) >> (irq & 7)) & 1;
+}
+
+static void __init construct_default_ioirq_mptable(int mpc_default_type)
+{
+       struct mpc_config_intsrc intsrc;
+       int i;
+       int ELCR_fallback = 0;
+
+       intsrc.mpc_type = MP_INTSRC;
+       intsrc.mpc_irqflag = 0;                 /* conforming */
+       intsrc.mpc_srcbus = 0;
+       intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+
+       intsrc.mpc_irqtype = mp_INT;
+
+       /*
+        *  If true, we have an ISA/PCI system with no IRQ entries
+        *  in the MP table. To prevent the PCI interrupts from being set up
+        *  incorrectly, we try to use the ELCR. The sanity check to see if
+        *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
+        *  never be level sensitive, so we simply see if the ELCR agrees.
+        *  If it does, we assume it's valid.
+        */
+       if (mpc_default_type == 5) {
+               printk("ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
+
+               if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
+                       printk("ELCR contains invalid data... not using ELCR\n");
+               else {
+                       printk("Using ELCR to identify PCI interrupts\n");
+                       ELCR_fallback = 1;
+               }
+       }
+
+       for (i = 0; i < 16; i++) {
+               switch (mpc_default_type) {
+               case 2:
+                       if (i == 0 || i == 13)
+                               continue;       /* IRQ0 & IRQ13 not connected */
+                       /* fall through */
+               default:
+                       if (i == 2)
+                               continue;       /* IRQ2 is never connected */
+               }
+
+               if (ELCR_fallback) {
+                       /*
+                        *  If the ELCR indicates a level-sensitive interrupt, we
+                        *  copy that information over to the MP table in the
+                        *  irqflag field (level sensitive, active high polarity).
+                        */
+                       if (ELCR_trigger(i))
+                               intsrc.mpc_irqflag = 13;
+                       else
+                               intsrc.mpc_irqflag = 0;
+               }
+
+               intsrc.mpc_srcbusirq = i;
+               intsrc.mpc_dstirq = i ? i : 2;          /* IRQ0 to INTIN2 */
+               MP_intsrc_info(&intsrc);
+       }
+
+       intsrc.mpc_irqtype = mp_ExtINT;
+       intsrc.mpc_srcbusirq = 0;
+       intsrc.mpc_dstirq = 0;                          /* 8259A to INTIN0 */
+       MP_intsrc_info(&intsrc);
+}
+
+static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+{
+       struct mpc_config_processor processor;
+       struct mpc_config_bus bus;
+       struct mpc_config_ioapic ioapic;
+       struct mpc_config_lintsrc lintsrc;
+       int linttypes[2] = { mp_ExtINT, mp_NMI };
+       int i;
+
+       /*
+        * local APIC has default address
+        */
+       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+       /*
+        * 2 CPUs, numbered 0 & 1.
+        */
+       processor.mpc_type = MP_PROCESSOR;
+       /* Either an integrated APIC or a discrete 82489DX. */
+       processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+       processor.mpc_cpuflag = CPU_ENABLED;
+       processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+                                  (boot_cpu_data.x86_model << 4) |
+                                  boot_cpu_data.x86_mask;
+       processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+       processor.mpc_reserved[0] = 0;
+       processor.mpc_reserved[1] = 0;
+       for (i = 0; i < 2; i++) {
+               processor.mpc_apicid = i;
+               MP_processor_info(&processor);
+       }
+
+       bus.mpc_type = MP_BUS;
+       bus.mpc_busid = 0;
+       switch (mpc_default_type) {
+               default:
+                       printk("???\nUnknown standard configuration %d\n",
+                               mpc_default_type);
+                       /* fall through */
+               case 1:
+               case 5:
+                       memcpy(bus.mpc_bustype, "ISA   ", 6);
+                       break;
+               case 2:
+               case 6:
+               case 3:
+                       memcpy(bus.mpc_bustype, "EISA  ", 6);
+                       break;
+               case 4:
+               case 7:
+                       memcpy(bus.mpc_bustype, "MCA   ", 6);
+       }
+       MP_bus_info(&bus);
+       if (mpc_default_type > 4) {
+               bus.mpc_busid = 1;
+               memcpy(bus.mpc_bustype, "PCI   ", 6);
+               MP_bus_info(&bus);
+       }
+
+       ioapic.mpc_type = MP_IOAPIC;
+       ioapic.mpc_apicid = 2;
+       ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+       ioapic.mpc_flags = MPC_APIC_USABLE;
+       ioapic.mpc_apicaddr = 0xFEC00000;
+       MP_ioapic_info(&ioapic);
+
+       /*
+        * We set up most of the low 16 IO-APIC pins according to MPS rules.
+        */
+       construct_default_ioirq_mptable(mpc_default_type);
+
+       lintsrc.mpc_type = MP_LINTSRC;
+       lintsrc.mpc_irqflag = 0;                /* conforming */
+       lintsrc.mpc_srcbusid = 0;
+       lintsrc.mpc_srcbusirq = 0;
+       lintsrc.mpc_destapic = MP_APIC_ALL;
+       for (i = 0; i < 2; i++) {
+               lintsrc.mpc_irqtype = linttypes[i];
+               lintsrc.mpc_destapiclint = i;
+               MP_lintsrc_info(&lintsrc);
+       }
+}
+
+static struct intel_mp_floating *mpf_found;
+
+/*
+ * Scan the memory blocks for an SMP configuration block.
+ */
+void __init get_smp_config (void)
+{
+       struct intel_mp_floating *mpf = mpf_found;
+       printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
+       if (mpf->mpf_feature2 & (1<<7)) {
+               printk("    IMCR and PIC compatibility mode.\n");
+               pic_mode = 1;
+       } else {
+               printk("    Virtual Wire compatibility mode.\n");
+               pic_mode = 0;
+       }
+
+       /*
+        * Now see if we need to read further.
+        */
+       if (mpf->mpf_feature1 != 0) {
+
+               printk("Default MP configuration #%d\n", mpf->mpf_feature1);
+               construct_default_ISA_mptable(mpf->mpf_feature1);
+
+       } else if (mpf->mpf_physptr) {
+
+               /*
+                * Read the physical hardware table.  Anything here will
+                * override the defaults.
+                */
+               if (!smp_read_mpc((void *)(unsigned long)mpf->mpf_physptr)) {
+                       smp_found_config = 0;
+                       printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
+                       printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+                       return;
+               }
+               /*
+                * If there are no explicit MP IRQ entries, then we are
+                * broken.  We set up most of the low 16 IO-APIC pins to
+                * ISA defaults and hope it will work.
+                */
+               if (!mp_irq_entries) {
+                       struct mpc_config_bus bus;
+
+                       printk("BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
+
+                       bus.mpc_type = MP_BUS;
+                       bus.mpc_busid = 0;
+                       memcpy(bus.mpc_bustype, "ISA   ", 6);
+                       MP_bus_info(&bus);
+
+                       construct_default_ioirq_mptable(0);
+               }
+
+       } else
+               BUG();
+
+       printk("Processors: %d\n", num_processors);
+       /*
+        * Only use the first configuration found.
+        */
+}
+
+static int __init smp_scan_config (unsigned long base, unsigned long length)
+{
+       unsigned long *bp = phys_to_virt(base);
+       struct intel_mp_floating *mpf;
+
+       Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
+       if (sizeof(*mpf) != 16)
+               printk("Error: MPF size\n");
+
+       while (length > 0) {
+               mpf = (struct intel_mp_floating *)bp;
+               if ((*bp == SMP_MAGIC_IDENT) &&
+                       (mpf->mpf_length == 1) &&
+                       !mpf_checksum((unsigned char *)bp, 16) &&
+                       ((mpf->mpf_specification == 1)
+                               || (mpf->mpf_specification == 4)) ) {
+
+                       smp_found_config = 1;
+                       printk("found SMP MP-table at %08lx\n",
+                                               virt_to_phys(mpf));
+                       reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
+                       if (mpf->mpf_physptr)
+                               reserve_bootmem(mpf->mpf_physptr, PAGE_SIZE);
+                       mpf_found = mpf;
+                       return 1;
+               }
+               bp += 4;
+               length -= 16;
+       }
+       return 0;
+}
+
+void __init find_intel_smp (void)
+{
+       unsigned int address;
+
+       /*
+        * FIXME: Linux assumes you have 640K of base ram..
+        * this continues the error...
+        *
+        * 1) Scan the bottom 1K for a signature
+        * 2) Scan the top 1K of base RAM
+        * 3) Scan the 64K of bios
+        */
+       if (smp_scan_config(0x0,0x400) ||
+               smp_scan_config(639*0x400,0x400) ||
+                       smp_scan_config(0xF0000,0x10000))
+               return;
+       /*
+        * If it is an SMP machine we should know now, unless the
+        * configuration is in an EISA/MCA bus machine with an
+        * extended bios data area.
+        *
+        * there is a real-mode segmented pointer pointing to the
+        * 4K EBDA area at 0x40E, calculate and scan it here.
+        *
+        * NOTE! There are Linux loaders that will corrupt the EBDA
+        * area, and as such this kind of SMP config may be less
+        * trustworthy, simply because the SMP table may have been
+        * stomped on during early boot. These loaders are buggy and
+        * should be fixed.
+        */
+
+       address = *(unsigned short *)phys_to_virt(0x40E);
+       address <<= 4;
+       smp_scan_config(address, 0x1000);
+       if (smp_found_config)
+               printk(KERN_WARNING "WARNING: MP table in the EBDA can be UNSAFE, contact linux-smp@vger.kernel.org if you experience SMP problems!\n");
+}
+
+#else
+
+/*
+ * The Visual Workstation is Intel MP compliant in the hardware
+ * sense, but it doesnt have a BIOS(-configuration table).
+ * No problem for Linux.
+ */
+void __init find_visws_smp(void)
+{
+       smp_found_config = 1;
+
+       phys_cpu_present_map |= 2; /* or in id 1 */
+       apic_version[1] |= 0x10; /* integrated APIC */
+       apic_version[0] |= 0x10;
+
+       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+}
+
+#endif
+
+/*
+ * - Intel MP Configuration Table
+ * - or SGI Visual Workstation configuration
+ */
+void __init find_smp_config (void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+       find_intel_smp();
+#endif
+#ifdef CONFIG_VISWS
+       find_visws_smp();
+#endif
+}
+
diff --git a/arch/x86_64/kernel/msr.c b/arch/x86_64/kernel/msr.c
new file mode 100644 (file)
index 0000000..5636ed5
--- /dev/null
@@ -0,0 +1,279 @@
+#ident "$Id: msr.c,v 1.6 2001/10/24 23:58:53 ak Exp $"
+/* ----------------------------------------------------------------------- *
+ *   
+ *   Copyright 2000 H. Peter Anvin - All Rights Reserved
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ *   USA; either version 2 of the License, or (at your option) any later
+ *   version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * msr.c
+ *
+ * x86 MSR access device
+ *
+ * This device is accessed by lseek() to the appropriate register number
+ * and then read/write in chunks of 8 bytes.  A larger size means multiple
+ * reads or writes of the same register.
+ *
+ * This driver uses /dev/cpu/%d/msr where %d is the minor number, and on
+ * an SMP box will direct the access to CPU %d.
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/major.h>
+#include <linux/fs.h>
+
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+/* Note: "err" is handled in a funny way below.  Otherwise one version
+   of gcc or another breaks. */
+
+static inline int wrmsr_eio(u32 reg, u32 eax, u32 edx)
+{
+  int err;
+
+  asm volatile(
+              "1:      wrmsr\n"
+              "2:\n"
+              ".section .fixup,\"ax\"\n"
+              "3:      movl %4,%0\n"
+              "        jmp 2b\n"
+              ".previous\n"
+              ".section __ex_table,\"a\"\n"
+              "        .align 4\n"
+              "        .quad 1b,3b\n"
+              ".previous"
+              : "=&bDS" (err)
+              : "a" (eax), "d" (edx), "c" (reg), "i" (-EIO), "0" (0));
+
+  return err;
+}
+
+static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx)
+{
+  int err;
+
+  asm volatile(
+              "1:      rdmsr\n"
+              "2:\n"
+              ".section .fixup,\"ax\"\n"
+              "3:      movl %4,%0\n"
+              "        jmp 2b\n"
+              ".previous\n"
+              ".section __ex_table,\"a\"\n"
+              "        .align 4\n"
+              "        .quad 1b,3b\n"
+              ".previous"
+              : "=&bDS" (err), "=a" (*eax), "=d" (*edx)
+              : "c" (reg), "i" (-EIO), "0" (0));
+
+  return err;
+}
+
+#ifdef CONFIG_SMP
+
+struct msr_command {
+  int cpu;
+  int err;
+  u32 reg;
+  u32 data[2];
+};
+
+static void msr_smp_wrmsr(void *cmd_block)
+{
+  struct msr_command *cmd = (struct msr_command *) cmd_block;
+  
+  if ( cmd->cpu == smp_processor_id() )
+    cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]);
+}
+
+static void msr_smp_rdmsr(void *cmd_block)
+{
+  struct msr_command *cmd = (struct msr_command *) cmd_block;
+  
+  if ( cmd->cpu == smp_processor_id() )
+    cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]);
+}
+
+static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
+{
+  struct msr_command cmd;
+
+  if ( cpu == smp_processor_id() ) {
+    return wrmsr_eio(reg, eax, edx);
+  } else {
+    cmd.cpu = cpu;
+    cmd.reg = reg;
+    cmd.data[0] = eax;
+    cmd.data[1] = edx;
+    
+    smp_call_function(msr_smp_wrmsr, &cmd, 1, 1);
+    return cmd.err;
+  }
+}
+
+static inline int do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx)
+{
+  struct msr_command cmd;
+
+  if ( cpu == smp_processor_id() ) {
+    return rdmsr_eio(reg, eax, edx);
+  } else {
+    cmd.cpu = cpu;
+    cmd.reg = reg;
+
+    smp_call_function(msr_smp_rdmsr, &cmd, 1, 1);
+    
+    *eax = cmd.data[0];
+    *edx = cmd.data[1];
+
+    return cmd.err;
+  }
+}
+
+#else /* ! CONFIG_SMP */
+
+static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx)
+{
+  return wrmsr_eio(reg, eax, edx);
+}
+
+static inline int do_rdmsr(int cpu, u32 reg, u32 *eax, u32 *edx)
+{
+  return rdmsr_eio(reg, eax, edx);
+}
+
+#endif /* ! CONFIG_SMP */
+
+static loff_t msr_seek(struct file *file, loff_t offset, int orig)
+{
+  loff_t ret = -EINVAL;
+  lock_kernel();
+  switch (orig) {
+  case 0:
+    file->f_pos = offset;
+    ret = file->f_pos;
+    break;
+  case 1:
+    file->f_pos += offset;
+    ret = file->f_pos;
+  }
+  unlock_kernel();
+  return ret;
+}
+
+static ssize_t msr_read(struct file * file, char * buf,
+                       size_t count, loff_t *ppos)
+{
+  u32 *tmp = (u32 *)buf;
+  u32 data[2];
+  size_t rv;
+  u32 reg = *ppos;
+  int cpu = minor(file->f_dentry->d_inode->i_rdev);
+  int err;
+
+  if ( count % 8 )
+    return -EINVAL; /* Invalid chunk size */
+  
+  for ( rv = 0 ; count ; count -= 8 ) {
+    err = do_rdmsr(cpu, reg, &data[0], &data[1]);
+    if ( err )
+      return err;
+    if ( copy_to_user(tmp,&data,8) )
+      return -EFAULT;
+    tmp += 2;
+  }
+
+  return ((char *)tmp) - buf;
+}
+
+static ssize_t msr_write(struct file * file, const char * buf,
+                        size_t count, loff_t *ppos)
+{
+  const u32 *tmp = (const u32 *)buf;
+  u32 data[2];
+  size_t rv;
+  u32 reg = *ppos;
+  int cpu = minor(file->f_dentry->d_inode->i_rdev);
+  int err;
+
+  if ( count % 8 )
+    return -EINVAL; /* Invalid chunk size */
+  
+  for ( rv = 0 ; count ; count -= 8 ) {
+    if ( copy_from_user(&data,tmp,8) )
+      return -EFAULT;
+    err = do_wrmsr(cpu, reg, data[0], data[1]);
+    if ( err )
+      return err;
+    tmp += 2;
+  }
+
+  return ((char *)tmp) - buf;
+}
+
+static int msr_open(struct inode *inode, struct file *file)
+{
+  int cpu = minor(file->f_dentry->d_inode->i_rdev);
+  struct cpuinfo_x86 *c = &(cpu_data)[cpu];
+  
+  if ( !(cpu_online_map & (1UL << cpu)) )
+    return -ENXIO;             /* No such CPU */
+  if ( !test_bit(X86_FEATURE_MSR, &c->x86_capability) )
+    return -EIO;               /* MSR not supported */
+  
+  return 0;
+}
+
+/*
+ * File operations we support
+ */
+static struct file_operations msr_fops = {
+  owner:       THIS_MODULE,
+  llseek:      msr_seek,
+  read:                msr_read,
+  write:       msr_write,
+  open:                msr_open,
+};
+
+int __init msr_init(void)
+{
+  if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) {
+    printk(KERN_ERR "msr: unable to get major %d for msr\n",
+          MSR_MAJOR);
+    return -EBUSY;
+  }
+  
+  return 0;
+}
+
+void __exit msr_exit(void)
+{
+  unregister_chrdev(MSR_MAJOR, "cpu/msr");
+}
+
+module_init(msr_init);
+module_exit(msr_exit)
+
+EXPORT_NO_SYMBOLS;
+
+MODULE_AUTHOR("H. Peter Anvin <hpa@zytor.com>");
+MODULE_DESCRIPTION("x86 generic MSR driver");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86_64/kernel/mtrr.c b/arch/x86_64/kernel/mtrr.c
new file mode 100644 (file)
index 0000000..ab20b56
--- /dev/null
@@ -0,0 +1,2310 @@
+/*  Generic MTRR (Memory Type Range Register) driver.
+
+    Copyright (C) 1997-2000  Richard Gooch
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Library General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Library General Public License for more details.
+
+    You should have received a copy of the GNU Library General Public
+    License along with this library; if not, write to the Free
+    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+    Richard Gooch may be reached by email at  rgooch@atnf.csiro.au
+    The postal address is:
+      Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+
+    Source: "Pentium Pro Family Developer's Manual, Volume 3:
+    Operating System Writer's Guide" (Intel document number 242692),
+    section 11.11.7
+
+    ChangeLog
+
+    Prehistory Martin Tischhäuser <martin@ikcbarka.fzk.de>
+              Initial register-setting code (from proform-1.0).
+    19971216   Richard Gooch <rgooch@atnf.csiro.au>
+               Original version for /proc/mtrr interface, SMP-safe.
+  v1.0
+    19971217   Richard Gooch <rgooch@atnf.csiro.au>
+               Bug fix for ioctls()'s.
+              Added sample code in Documentation/mtrr.txt
+  v1.1
+    19971218   Richard Gooch <rgooch@atnf.csiro.au>
+               Disallow overlapping regions.
+    19971219   Jens Maurer <jmaurer@menuett.rhein-main.de>
+               Register-setting fixups.
+  v1.2
+    19971222   Richard Gooch <rgooch@atnf.csiro.au>
+               Fixups for kernel 2.1.75.
+  v1.3
+    19971229   David Wragg <dpw@doc.ic.ac.uk>
+               Register-setting fixups and conformity with Intel conventions.
+    19971229   Richard Gooch <rgooch@atnf.csiro.au>
+               Cosmetic changes and wrote this ChangeLog ;-)
+    19980106   Richard Gooch <rgooch@atnf.csiro.au>
+               Fixups for kernel 2.1.78.
+  v1.4
+    19980119   David Wragg <dpw@doc.ic.ac.uk>
+               Included passive-release enable code (elsewhere in PCI setup).
+  v1.5
+    19980131   Richard Gooch <rgooch@atnf.csiro.au>
+               Replaced global kernel lock with private spinlock.
+  v1.6
+    19980201   Richard Gooch <rgooch@atnf.csiro.au>
+               Added wait for other CPUs to complete changes.
+  v1.7
+    19980202   Richard Gooch <rgooch@atnf.csiro.au>
+               Bug fix in definition of <set_mtrr> for UP.
+  v1.8
+    19980319   Richard Gooch <rgooch@atnf.csiro.au>
+               Fixups for kernel 2.1.90.
+    19980323   Richard Gooch <rgooch@atnf.csiro.au>
+               Move SMP BIOS fixup before secondary CPUs call <calibrate_delay>
+  v1.9
+    19980325   Richard Gooch <rgooch@atnf.csiro.au>
+               Fixed test for overlapping regions: confused by adjacent regions
+    19980326   Richard Gooch <rgooch@atnf.csiro.au>
+               Added wbinvd in <set_mtrr_prepare>.
+    19980401   Richard Gooch <rgooch@atnf.csiro.au>
+               Bug fix for non-SMP compilation.
+    19980418   David Wragg <dpw@doc.ic.ac.uk>
+               Fixed-MTRR synchronisation for SMP and use atomic operations
+              instead of spinlocks.
+    19980418   Richard Gooch <rgooch@atnf.csiro.au>
+              Differentiate different MTRR register classes for BIOS fixup.
+  v1.10
+    19980419   David Wragg <dpw@doc.ic.ac.uk>
+              Bug fix in variable MTRR synchronisation.
+  v1.11
+    19980419   Richard Gooch <rgooch@atnf.csiro.au>
+              Fixups for kernel 2.1.97.
+  v1.12
+    19980421   Richard Gooch <rgooch@atnf.csiro.au>
+              Safer synchronisation across CPUs when changing MTRRs.
+  v1.13
+    19980423   Richard Gooch <rgooch@atnf.csiro.au>
+              Bugfix for SMP systems without MTRR support.
+  v1.14
+    19980427   Richard Gooch <rgooch@atnf.csiro.au>
+              Trap calls to <mtrr_add> and <mtrr_del> on non-MTRR machines.
+  v1.15
+    19980427   Richard Gooch <rgooch@atnf.csiro.au>
+              Use atomic bitops for setting SMP change mask.
+  v1.16
+    19980428   Richard Gooch <rgooch@atnf.csiro.au>
+              Removed spurious diagnostic message.
+  v1.17
+    19980429   Richard Gooch <rgooch@atnf.csiro.au>
+              Moved register-setting macros into this file.
+              Moved setup code from init/main.c to i386-specific areas.
+  v1.18
+    19980502   Richard Gooch <rgooch@atnf.csiro.au>
+              Moved MTRR detection outside conditionals in <mtrr_init>.
+  v1.19
+    19980502   Richard Gooch <rgooch@atnf.csiro.au>
+              Documentation improvement: mention Pentium II and AGP.
+  v1.20
+    19980521   Richard Gooch <rgooch@atnf.csiro.au>
+              Only manipulate interrupt enable flag on local CPU.
+              Allow enclosed uncachable regions.
+  v1.21
+    19980611   Richard Gooch <rgooch@atnf.csiro.au>
+              Always define <main_lock>.
+  v1.22
+    19980901   Richard Gooch <rgooch@atnf.csiro.au>
+              Removed module support in order to tidy up code.
+              Added sanity check for <mtrr_add>/<mtrr_del> before <mtrr_init>.
+              Created addition queue for prior to SMP commence.
+  v1.23
+    19980902   Richard Gooch <rgooch@atnf.csiro.au>
+              Ported patch to kernel 2.1.120-pre3.
+  v1.24
+    19980910   Richard Gooch <rgooch@atnf.csiro.au>
+              Removed sanity checks and addition queue: Linus prefers an OOPS.
+  v1.25
+    19981001   Richard Gooch <rgooch@atnf.csiro.au>
+              Fixed harmless compiler warning in include/asm-i386/mtrr.h
+              Fixed version numbering and history for v1.23 -> v1.24.
+  v1.26
+    19990118   Richard Gooch <rgooch@atnf.csiro.au>
+              Added devfs support.
+  v1.27
+    19990123   Richard Gooch <rgooch@atnf.csiro.au>
+              Changed locking to spin with reschedule.
+              Made use of new <smp_call_function>.
+  v1.28
+    19990201   Zoltán Böszörményi <zboszor@mail.externet.hu>
+              Extended the driver to be able to use Cyrix style ARRs.
+    19990204   Richard Gooch <rgooch@atnf.csiro.au>
+              Restructured Cyrix support.
+  v1.29
+    19990204   Zoltán Böszörményi <zboszor@mail.externet.hu>
+              Refined ARR support: enable MAPEN in set_mtrr_prepare()
+              and disable MAPEN in set_mtrr_done().
+    19990205   Richard Gooch <rgooch@atnf.csiro.au>
+              Minor cleanups.
+  v1.30
+    19990208   Zoltán Böszörményi <zboszor@mail.externet.hu>
+               Protect plain 6x86s (and other processors without the
+               Page Global Enable feature) against accessing CR4 in
+               set_mtrr_prepare() and set_mtrr_done().
+    19990210   Richard Gooch <rgooch@atnf.csiro.au>
+              Turned <set_mtrr_up> and <get_mtrr> into function pointers.
+  v1.31
+    19990212   Zoltán Böszörményi <zboszor@mail.externet.hu>
+               Major rewrite of cyrix_arr_init(): do not touch ARRs,
+               leave them as the BIOS have set them up.
+               Enable usage of all 8 ARRs.
+               Avoid multiplications by 3 everywhere and other
+               code clean ups/speed ups.
+    19990213   Zoltán Böszörményi <zboszor@mail.externet.hu>
+               Set up other Cyrix processors identical to the boot cpu.
+               Since Cyrix don't support Intel APIC, this is l'art pour l'art.
+               Weigh ARRs by size:
+               If size <= 32M is given, set up ARR# we were given.
+               If size >  32M is given, set up ARR7 only if it is free,
+               fail otherwise.
+    19990214   Zoltán Böszörményi <zboszor@mail.externet.hu>
+               Also check for size >= 256K if we are to set up ARR7,
+               mtrr_add() returns the value it gets from set_mtrr()
+    19990218   Zoltán Böszörményi <zboszor@mail.externet.hu>
+               Remove Cyrix "coma bug" workaround from here.
+               Moved to linux/arch/i386/kernel/setup.c and
+               linux/include/asm-i386/bugs.h
+    19990228   Richard Gooch <rgooch@atnf.csiro.au>
+              Added MTRRIOC_KILL_ENTRY ioctl(2)
+              Trap for counter underflow in <mtrr_file_del>.
+              Trap for 4 MiB aligned regions for PPro, stepping <= 7.
+    19990301   Richard Gooch <rgooch@atnf.csiro.au>
+              Created <get_free_region> hook.
+    19990305   Richard Gooch <rgooch@atnf.csiro.au>
+              Temporarily disable AMD support now MTRR capability flag is set.
+  v1.32
+    19990308   Zoltán Böszörményi <zboszor@mail.externet.hu>
+              Adjust my changes (19990212-19990218) to Richard Gooch's
+              latest changes. (19990228-19990305)
+  v1.33
+    19990309   Richard Gooch <rgooch@atnf.csiro.au>
+              Fixed typo in <printk> message.
+    19990310   Richard Gooch <rgooch@atnf.csiro.au>
+              Support K6-II/III based on Alan Cox's <alan@redhat.com> patches.
+  v1.34
+    19990511   Bart Hartgers <bart@etpmod.phys.tue.nl>
+              Support Centaur C6 MCR's.
+    19990512   Richard Gooch <rgooch@atnf.csiro.au>
+              Minor cleanups.
+  v1.35
+    19990707   Zoltán Böszörményi <zboszor@mail.externet.hu>
+               Check whether ARR3 is protected in cyrix_get_free_region()
+               and mtrr_del(). The code won't attempt to delete or change it
+               from now on if the BIOS protected ARR3. It silently skips ARR3
+               in cyrix_get_free_region() or returns with an error code from
+               mtrr_del().
+    19990711   Zoltán Böszörményi <zboszor@mail.externet.hu>
+               Reset some bits in the CCRs in cyrix_arr_init() to disable SMM
+               if ARR3 isn't protected. This is needed because if SMM is active
+               and ARR3 isn't protected then deleting and setting ARR3 again
+               may lock up the processor. With SMM entirely disabled, it does
+               not happen.
+    19990812   Zoltán Böszörményi <zboszor@mail.externet.hu>
+               Rearrange switch() statements so the driver accomodates to
+               the fact that the AMD Athlon handles its MTRRs the same way
+               as Intel does.
+    19990814   Zoltán Böszörményi <zboszor@mail.externet.hu>
+              Double check for Intel in mtrr_add()'s big switch() because
+              that revision check is only valid for Intel CPUs.
+    19990819   Alan Cox <alan@redhat.com>
+               Tested Zoltan's changes on a pre production Athlon - 100%
+               success.
+    19991008   Manfred Spraul <manfreds@colorfullife.com>
+              replaced spin_lock_reschedule() with a normal semaphore.
+  v1.36
+    20000221   Richard Gooch <rgooch@atnf.csiro.au>
+               Compile fix if procfs and devfs not enabled.
+              Formatting changes.
+  v1.37
+    20001109   H. Peter Anvin <hpa@zytor.com>
+              Use the new centralized CPU feature detects.
+
+  v1.38
+    20010309   Dave Jones <davej@suse.de>
+              Add support for Cyrix III.
+
+  v1.39
+    20010312   Dave Jones <davej@suse.de>
+               Ugh, I broke AMD support.
+              Reworked fix by Troels Walsted Hansen <troels@thule.no>
+
+  v1.40
+    20010327   Dave Jones <davej@suse.de>
+              Adapted Cyrix III support to include VIA C3.
+
+*/
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/timer.h>
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/wait.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/ctype.h>
+#include <linux/proc_fs.h>
+#include <linux/devfs_fs_kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#define MTRR_NEED_STRINGS
+#include <asm/mtrr.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/pgtable.h>
+#include <asm/segment.h>
+#include <asm/bitops.h>
+#include <asm/atomic.h>
+#include <asm/msr.h>
+
+#include <asm/hardirq.h>
+#include <linux/irq.h>
+
+#define MTRR_VERSION            "1.40 (20010327)"
+
+#define TRUE  1
+#define FALSE 0
+
+/*
+ * The code assumes all processors support the same MTRR
+ * interface.  This is generally a good assumption, but could
+ * potentially be a problem.
+ */
+enum mtrr_if_type {
+    MTRR_IF_NONE,              /* No MTRRs supported */
+    MTRR_IF_INTEL,             /* Intel (P6) standard MTRRs */
+    MTRR_IF_AMD_K6,            /* AMD pre-Athlon MTRRs */
+    MTRR_IF_CYRIX_ARR,         /* Cyrix ARRs */
+    MTRR_IF_CENTAUR_MCR,       /* Centaur MCRs */
+} mtrr_if = MTRR_IF_NONE;
+
+static __initdata char *mtrr_if_name[] = {
+    "none", "Intel", "AMD K6", "Cyrix ARR", "Centaur MCR"
+};
+
+#define MTRRcap_MSR     0x0fe
+#define MTRRdefType_MSR 0x2ff
+
+#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
+#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
+
+#define NUM_FIXED_RANGES 88
+#define MTRRfix64K_00000_MSR 0x250
+#define MTRRfix16K_80000_MSR 0x258
+#define MTRRfix16K_A0000_MSR 0x259
+#define MTRRfix4K_C0000_MSR 0x268
+#define MTRRfix4K_C8000_MSR 0x269
+#define MTRRfix4K_D0000_MSR 0x26a
+#define MTRRfix4K_D8000_MSR 0x26b
+#define MTRRfix4K_E0000_MSR 0x26c
+#define MTRRfix4K_E8000_MSR 0x26d
+#define MTRRfix4K_F0000_MSR 0x26e
+#define MTRRfix4K_F8000_MSR 0x26f
+
+#ifdef CONFIG_SMP
+#  define MTRR_CHANGE_MASK_FIXED     0x01
+#  define MTRR_CHANGE_MASK_VARIABLE  0x02
+#  define MTRR_CHANGE_MASK_DEFTYPE   0x04
+#endif
+
+/* In the Intel processor's MTRR interface, the MTRR type is always held in
+   an 8 bit field: */
+typedef u8 mtrr_type;
+
+#define LINE_SIZE      80
+#define JIFFIE_TIMEOUT 100
+
+#ifdef CONFIG_SMP
+#  define set_mtrr(reg,base,size,type) set_mtrr_smp (reg, base, size, type)
+#else
+#  define set_mtrr(reg,base,size,type) (*set_mtrr_up) (reg, base, size, type, \
+                                                      TRUE)
+#endif
+
+#if defined(CONFIG_PROC_FS) || defined(CONFIG_DEVFS_FS)
+# define USERSPACE_INTERFACE
+#endif
+
+#ifndef USERSPACE_INTERFACE
+#  define compute_ascii() while (0)
+#endif
+
+#ifdef USERSPACE_INTERFACE
+static char *ascii_buffer;
+static unsigned int ascii_buf_bytes;
+#endif
+static unsigned int *usage_table;
+static DECLARE_MUTEX(main_lock);
+
+/*  Private functions  */
+#ifdef USERSPACE_INTERFACE
+static void compute_ascii (void);
+#endif
+
+
+struct set_mtrr_context
+{
+    unsigned long flags;
+    unsigned long deftype_lo;
+    unsigned long deftype_hi;
+    unsigned long cr4val;
+    unsigned long ccr3;
+};
+
+static int arr3_protected;
+
+/*  Put the processor into a state where MTRRs can be safely set  */
+static void set_mtrr_prepare_save (struct set_mtrr_context *ctxt)
+{
+    /*  Disable interrupts locally  */
+    __save_flags (ctxt->flags); __cli ();
+
+    if ( mtrr_if != MTRR_IF_INTEL && mtrr_if != MTRR_IF_CYRIX_ARR )
+        return;
+
+    /*  Save value of CR4 and clear Page Global Enable (bit 7)  */
+    if ( test_bit(X86_FEATURE_PGE, &boot_cpu_data.x86_capability) ) {
+        ctxt->cr4val = read_cr4();
+        write_cr4(ctxt->cr4val & ~(1<<7));
+    }
+
+    /*  Disable and flush caches. Note that wbinvd flushes the TLBs as
+       a side-effect  */
+
+    { 
+           long cr0 = read_cr0() | 0x40000000;
+           wbinvd();
+           write_cr0( cr0 );
+           wbinvd();
+    }
+
+    if ( mtrr_if == MTRR_IF_INTEL ) {
+       /*  Save MTRR state */
+       rdmsr (MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
+    } else {
+       /* Cyrix ARRs - everything else were excluded at the top */
+       ctxt->ccr3 = getCx86 (CX86_CCR3);
+    }
+}   /*  End Function set_mtrr_prepare_save  */
+
+static void set_mtrr_cache_disable (struct set_mtrr_context *ctxt)
+{
+    if ( mtrr_if != MTRR_IF_INTEL && mtrr_if != MTRR_IF_CYRIX_ARR )
+        return;
+
+    if ( mtrr_if == MTRR_IF_INTEL ) {
+       /*  Disable MTRRs, and set the default type to uncached  */
+       wrmsr (MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, ctxt->deftype_hi);
+    } else {
+       /* Cyrix ARRs - everything else were excluded at the top */
+       setCx86 (CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
+    }
+}   /*  End Function set_mtrr_cache_disable  */
+
+/*  Restore the processor after a set_mtrr_prepare  */
+static void set_mtrr_done (struct set_mtrr_context *ctxt)
+{
+    if ( mtrr_if != MTRR_IF_INTEL && mtrr_if != MTRR_IF_CYRIX_ARR ) {
+        __restore_flags (ctxt->flags);
+        return;
+    }
+
+    /*  Flush caches and TLBs  */
+    wbinvd();
+
+    /*  Restore MTRRdefType  */
+    if ( mtrr_if == MTRR_IF_INTEL ) {
+       /* Intel (P6) standard MTRRs */
+       wrmsr (MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
+    } else {
+       /* Cyrix ARRs - everything else was excluded at the top */
+       setCx86 (CX86_CCR3, ctxt->ccr3);
+    }
+
+    /*  Enable caches  */
+    write_cr0( read_cr0() & 0xbfffffff );
+
+    /*  Restore value of CR4  */
+    if ( test_bit(X86_FEATURE_PGE, &boot_cpu_data.x86_capability) )
+        write_cr4(ctxt->cr4val);
+
+    /*  Re-enable interrupts locally (if enabled previously)  */
+    __restore_flags (ctxt->flags);
+}   /*  End Function set_mtrr_done  */
+
+/*  This function returns the number of variable MTRRs  */
+static unsigned int get_num_var_ranges (void)
+{
+    unsigned long config, dummy;
+
+    switch ( mtrr_if )
+    {
+    case MTRR_IF_INTEL:
+       rdmsr (MTRRcap_MSR, config, dummy);
+       return (config & 0xff);
+    case MTRR_IF_AMD_K6:
+       return 2;
+    case MTRR_IF_CYRIX_ARR:
+       return 8;
+    case MTRR_IF_CENTAUR_MCR:
+       return 8;
+    default:
+       return 0;
+    }
+}   /*  End Function get_num_var_ranges  */
+
+/*  Returns non-zero if we have the write-combining memory type  */
+static int have_wrcomb (void)
+{
+    unsigned long config, dummy;
+    struct pci_dev *dev = NULL;
+    
+   /* ServerWorks LE chipsets have problems with  write-combining 
+      Don't allow it and  leave room for other chipsets to be tagged */
+
+    if ((dev = pci_find_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) {
+       switch(dev->vendor) {
+        case PCI_VENDOR_ID_SERVERWORKS:
+           switch (dev->device) {
+           case PCI_DEVICE_ID_SERVERWORKS_LE:
+               return 0;
+               break;
+           default:
+               break;
+           }
+           break;
+       default:
+           break;
+       }
+    }
+
+
+    switch ( mtrr_if )
+    {
+    case MTRR_IF_INTEL:
+       rdmsr (MTRRcap_MSR, config, dummy);
+       return (config & (1<<10));
+       return 1;
+    case MTRR_IF_AMD_K6:
+    case MTRR_IF_CENTAUR_MCR:
+    case MTRR_IF_CYRIX_ARR:
+       return 1;
+    default:
+       return 0;
+    }
+}   /*  End Function have_wrcomb  */
+
+static u32 size_or_mask, size_and_mask;
+
+static void intel_get_mtrr (unsigned int reg, unsigned long *base,
+                           unsigned long *size, mtrr_type *type)
+{
+    unsigned long mask_lo, mask_hi, base_lo, base_hi;
+
+    rdmsr (MTRRphysMask_MSR(reg), mask_lo, mask_hi);
+    if ( (mask_lo & 0x800) == 0 )
+    {
+       /*  Invalid (i.e. free) range  */
+       *base = 0;
+       *size = 0;
+       *type = 0;
+       return;
+    }
+
+    rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
+
+    /* Work out the shifted address mask. */
+    mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT)
+               | mask_lo >> PAGE_SHIFT;
+
+    /* This works correctly if size is a power of two, i.e. a
+       contiguous range. */
+     *size = -mask_lo;
+     *base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
+     *type = base_lo & 0xff;
+}   /*  End Function intel_get_mtrr  */
+
+static void cyrix_get_arr (unsigned int reg, unsigned long *base,
+                          unsigned long *size, mtrr_type *type)
+{
+    unsigned long flags;
+    unsigned char arr, ccr3, rcr, shift;
+
+    arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
+
+    /* Save flags and disable interrupts */
+    __save_flags (flags); __cli ();
+
+    ccr3 = getCx86 (CX86_CCR3);
+    setCx86 (CX86_CCR3, (ccr3 & 0x0f) | 0x10);         /* enable MAPEN */
+    ((unsigned char *) base)[3]  = getCx86 (arr);
+    ((unsigned char *) base)[2]  = getCx86 (arr+1);
+    ((unsigned char *) base)[1]  = getCx86 (arr+2);
+    rcr = getCx86(CX86_RCR_BASE + reg);
+    setCx86 (CX86_CCR3, ccr3);                         /* disable MAPEN */
+
+    /* Enable interrupts if it was enabled previously */
+    __restore_flags (flags);
+    shift = ((unsigned char *) base)[1] & 0x0f;
+    *base >>= PAGE_SHIFT;
+
+    /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7
+     * Note: shift==0xf means 4G, this is unsupported.
+     */
+    if (shift)
+      *size = (reg < 7 ? 0x1UL : 0x40UL) << (shift - 1);
+    else
+      *size = 0;
+
+    /* Bit 0 is Cache Enable on ARR7, Cache Disable on ARR0-ARR6 */
+    if (reg < 7)
+    {
+       switch (rcr)
+       {
+         case  1: *type = MTRR_TYPE_UNCACHABLE; break;
+         case  8: *type = MTRR_TYPE_WRBACK;     break;
+         case  9: *type = MTRR_TYPE_WRCOMB;     break;
+         case 24:
+         default: *type = MTRR_TYPE_WRTHROUGH;  break;
+       }
+    } else
+    {
+       switch (rcr)
+       {
+         case  0: *type = MTRR_TYPE_UNCACHABLE; break;
+         case  8: *type = MTRR_TYPE_WRCOMB;     break;
+         case  9: *type = MTRR_TYPE_WRBACK;     break;
+         case 25:
+         default: *type = MTRR_TYPE_WRTHROUGH;  break;
+       }
+    }
+}   /*  End Function cyrix_get_arr  */
+
+static void amd_get_mtrr (unsigned int reg, unsigned long *base,
+                         unsigned long *size, mtrr_type *type)
+{
+    unsigned long low, high;
+
+    rdmsr (0xC0000085, low, high);
+    /*  Upper dword is region 1, lower is region 0  */
+    if (reg == 1) low = high;
+    /*  The base masks off on the right alignment  */
+    *base = (low & 0xFFFE0000) >> PAGE_SHIFT;
+    *type = 0;
+    if (low & 1) *type = MTRR_TYPE_UNCACHABLE;
+    if (low & 2) *type = MTRR_TYPE_WRCOMB;
+    if ( !(low & 3) )
+    {
+       *size = 0;
+       return;
+    }
+    /*
+     * This needs a little explaining. The size is stored as an
+     * inverted mask of bits of 128K granularity 15 bits long offset
+     * 2 bits
+     *
+     * So to get a size we do invert the mask and add 1 to the lowest
+     * mask bit (4 as its 2 bits in). This gives us a size we then shift
+     * to turn into 128K blocks
+     *
+     * eg              111 1111 1111 1100      is 512K
+     *
+     * invert          000 0000 0000 0011
+     * +1              000 0000 0000 0100
+     * *128K   ...
+     */
+    low = (~low) & 0x1FFFC;
+    *size = (low + 4) << (15 - PAGE_SHIFT);
+    return;
+}   /*  End Function amd_get_mtrr  */
+
+static struct
+{
+    unsigned long high;
+    unsigned long low;
+} centaur_mcr[8];
+
+static u8 centaur_mcr_reserved;
+static u8 centaur_mcr_type;            /* 0 for winchip, 1 for winchip2 */
+
+/*
+ *     Report boot time MCR setups 
+ */
+void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
+{
+       centaur_mcr[mcr].low = lo;
+       centaur_mcr[mcr].high = hi;
+}
+
+static void centaur_get_mcr (unsigned int reg, unsigned long *base,
+                            unsigned long *size, mtrr_type *type)
+{
+    *base = centaur_mcr[reg].high >> PAGE_SHIFT;
+    *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT;
+    *type = MTRR_TYPE_WRCOMB;  /*  If it is there, it is write-combining  */
+    if(centaur_mcr_type==1 && ((centaur_mcr[reg].low&31)&2))
+       *type = MTRR_TYPE_UNCACHABLE;
+    if(centaur_mcr_type==1 && (centaur_mcr[reg].low&31)==25)
+       *type = MTRR_TYPE_WRBACK;
+    if(centaur_mcr_type==0 && (centaur_mcr[reg].low&31)==31)
+       *type = MTRR_TYPE_WRBACK;
+    
+}   /*  End Function centaur_get_mcr  */
+
+static void (*get_mtrr) (unsigned int reg, unsigned long *base,
+                        unsigned long *size, mtrr_type *type);
+
+static void intel_set_mtrr_up (unsigned int reg, unsigned long base,
+                              unsigned long size, mtrr_type type, int do_safe)
+/*  [SUMMARY] Set variable MTRR register on the local CPU.
+    <reg> The register to set.
+    <base> The base address of the region.
+    <size> The size of the region. If this is 0 the region is disabled.
+    <type> The type of the region.
+    <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
+    be done externally.
+    [RETURNS] Nothing.
+*/
+{
+    struct set_mtrr_context ctxt;
+
+    if (do_safe) {
+       set_mtrr_prepare_save (&ctxt);
+       set_mtrr_cache_disable (&ctxt);
+       }
+    if (size == 0)
+    {
+       /* The invalid bit is kept in the mask, so we simply clear the
+          relevant mask register to disable a range. */
+       wrmsr (MTRRphysMask_MSR (reg), 0, 0);
+    }
+    else
+    {
+       wrmsr (MTRRphysBase_MSR (reg), base << PAGE_SHIFT | type,
+               (base & size_and_mask) >> (32 - PAGE_SHIFT));
+       wrmsr (MTRRphysMask_MSR (reg), -size << PAGE_SHIFT | 0x800,
+               (-size & size_and_mask) >> (32 - PAGE_SHIFT));
+    }
+    if (do_safe) set_mtrr_done (&ctxt);
+}   /*  End Function intel_set_mtrr_up  */
+
+static void cyrix_set_arr_up (unsigned int reg, unsigned long base,
+                             unsigned long size, mtrr_type type, int do_safe)
+{
+    struct set_mtrr_context ctxt;
+    unsigned char arr, arr_type, arr_size;
+
+    arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
+
+    /* count down from 32M (ARR0-ARR6) or from 2G (ARR7) */
+    if (reg >= 7)
+       size >>= 6;
+
+    size &= 0x7fff; /* make sure arr_size <= 14 */
+    for(arr_size = 0; size; arr_size++, size >>= 1);
+
+    if (reg<7)
+    {
+       switch (type) {
+         case MTRR_TYPE_UNCACHABLE:    arr_type =  1; break;
+         case MTRR_TYPE_WRCOMB:                arr_type =  9; break;
+         case MTRR_TYPE_WRTHROUGH:     arr_type = 24; break;
+         default:                      arr_type =  8; break;
+       }
+    }
+    else
+    {
+       switch (type)
+       {
+         case MTRR_TYPE_UNCACHABLE:    arr_type =  0; break;
+         case MTRR_TYPE_WRCOMB:                arr_type =  8; break;
+         case MTRR_TYPE_WRTHROUGH:     arr_type = 25; break;
+         default:                      arr_type =  9; break;
+       }
+    }
+
+    if (do_safe) {
+       set_mtrr_prepare_save (&ctxt);
+       set_mtrr_cache_disable (&ctxt);
+    }
+    base <<= PAGE_SHIFT;
+    setCx86(arr,    ((unsigned char *) &base)[3]);
+    setCx86(arr+1,  ((unsigned char *) &base)[2]);
+    setCx86(arr+2, (((unsigned char *) &base)[1]) | arr_size);
+    setCx86(CX86_RCR_BASE + reg, arr_type);
+    if (do_safe) set_mtrr_done (&ctxt);
+}   /*  End Function cyrix_set_arr_up  */
+
+static void amd_set_mtrr_up (unsigned int reg, unsigned long base,
+                            unsigned long size, mtrr_type type, int do_safe)
+/*  [SUMMARY] Set variable MTRR register on the local CPU.
+    <reg> The register to set.
+    <base> The base address of the region.
+    <size> The size of the region. If this is 0 the region is disabled.
+    <type> The type of the region.
+    <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
+    be done externally.
+    [RETURNS] Nothing.
+*/
+{
+    u32 regs[2];
+    struct set_mtrr_context ctxt;
+
+    if (do_safe) {
+       set_mtrr_prepare_save (&ctxt);
+       set_mtrr_cache_disable (&ctxt);
+    }
+    /*
+     * Low is MTRR0 , High MTRR 1
+     */
+    rdmsr (0xC0000085, regs[0], regs[1]);
+    /*
+     * Blank to disable
+     */
+    if (size == 0)
+       regs[reg] = 0;
+    else
+       /* Set the register to the base, the type (off by one) and an
+          inverted bitmask of the size The size is the only odd
+          bit. We are fed say 512K We invert this and we get 111 1111
+          1111 1011 but if you subtract one and invert you get the   
+          desired 111 1111 1111 1100 mask
+
+          But ~(x - 1) == ~x + 1 == -x. Two's complement rocks!  */
+       regs[reg] = (-size>>(15-PAGE_SHIFT) & 0x0001FFFC)
+                               | (base<<PAGE_SHIFT) | (type+1);
+
+    /*
+     * The writeback rule is quite specific. See the manual. Its
+     * disable local interrupts, write back the cache, set the mtrr
+     */
+    wbinvd();
+    wrmsr (0xC0000085, regs[0], regs[1]);
+    if (do_safe) set_mtrr_done (&ctxt);
+}   /*  End Function amd_set_mtrr_up  */
+
+
+static void centaur_set_mcr_up (unsigned int reg, unsigned long base,
+                               unsigned long size, mtrr_type type,
+                               int do_safe)
+{
+    struct set_mtrr_context ctxt;
+    unsigned long low, high;
+
+    if (do_safe) {
+       set_mtrr_prepare_save (&ctxt);
+       set_mtrr_cache_disable (&ctxt);
+    }
+    if (size == 0)
+    {
+        /*  Disable  */
+        high = low = 0;
+    }
+    else
+    {
+       high = base << PAGE_SHIFT;
+       if(centaur_mcr_type == 0)
+               low = -size << PAGE_SHIFT | 0x1f; /* only support write-combining... */
+       else
+       {
+               if(type == MTRR_TYPE_UNCACHABLE)
+                       low = -size << PAGE_SHIFT | 0x02;       /* NC */
+               else
+                       low = -size << PAGE_SHIFT | 0x09;       /* WWO,WC */
+       }
+    }
+    centaur_mcr[reg].high = high;
+    centaur_mcr[reg].low = low;
+    wrmsr (0x110 + reg, low, high);
+    if (do_safe) set_mtrr_done( &ctxt );
+}   /*  End Function centaur_set_mtrr_up  */
+
+static void (*set_mtrr_up) (unsigned int reg, unsigned long base,
+                           unsigned long size, mtrr_type type,
+                           int do_safe);
+
+#ifdef CONFIG_SMP
+
+struct mtrr_var_range
+{
+    unsigned long base_lo;
+    unsigned long base_hi;
+    unsigned long mask_lo;
+    unsigned long mask_hi;
+};
+
+
+/*  Get the MSR pair relating to a var range  */
+static void __init get_mtrr_var_range (unsigned int index,
+                                          struct mtrr_var_range *vr)
+{
+    rdmsr (MTRRphysBase_MSR (index), vr->base_lo, vr->base_hi);
+    rdmsr (MTRRphysMask_MSR (index), vr->mask_lo, vr->mask_hi);
+}   /*  End Function get_mtrr_var_range  */
+
+
+/*  Set the MSR pair relating to a var range. Returns TRUE if
+    changes are made  */
+static int __init set_mtrr_var_range_testing (unsigned int index,
+                                                 struct mtrr_var_range *vr)
+{
+    unsigned int lo, hi;
+    int changed = FALSE;
+
+    rdmsr(MTRRphysBase_MSR(index), lo, hi);
+    if ( (vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
+        || (vr->base_hi & 0xfUL) != (hi & 0xfUL) )
+    {
+       wrmsr (MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
+       changed = TRUE;
+    }
+
+    rdmsr (MTRRphysMask_MSR(index), lo, hi);
+
+    if ( (vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL)
+        || (vr->mask_hi & 0xfUL) != (hi & 0xfUL) )
+    {
+       wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
+       changed = TRUE;
+    }
+    return changed;
+}   /*  End Function set_mtrr_var_range_testing  */
+
+static void __init get_fixed_ranges(mtrr_type *frs)
+{
+    unsigned long *p = (unsigned long *)frs;
+    int i;
+
+    rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
+
+    for (i = 0; i < 2; i++)
+       rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i*2], p[3 + i*2]);
+    for (i = 0; i < 8; i++)
+       rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i*2], p[7 + i*2]);
+}   /*  End Function get_fixed_ranges  */
+
+static int __init set_fixed_ranges_testing(mtrr_type *frs)
+{
+    unsigned long *p = (unsigned long *)frs;
+    int changed = FALSE;
+    int i;
+    unsigned long lo, hi;
+
+    rdmsr(MTRRfix64K_00000_MSR, lo, hi);
+    if (p[0] != lo || p[1] != hi)
+    {
+       wrmsr (MTRRfix64K_00000_MSR, p[0], p[1]);
+       changed = TRUE;
+    }
+
+    for (i = 0; i < 2; i++)
+    {
+       rdmsr (MTRRfix16K_80000_MSR + i, lo, hi);
+       if (p[2 + i*2] != lo || p[3 + i*2] != hi)
+       {
+           wrmsr (MTRRfix16K_80000_MSR + i, p[2 + i*2], p[3 + i*2]);
+           changed = TRUE;
+       }
+    }
+
+    for (i = 0; i < 8; i++)
+    {
+       rdmsr (MTRRfix4K_C0000_MSR + i, lo, hi);
+       if (p[6 + i*2] != lo || p[7 + i*2] != hi)
+       {
+           wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i*2], p[7 + i*2]);
+           changed = TRUE;
+       }
+    }
+    return changed;
+}   /*  End Function set_fixed_ranges_testing  */
+
+struct mtrr_state
+{
+    unsigned int num_var_ranges;
+    struct mtrr_var_range *var_ranges;
+    mtrr_type fixed_ranges[NUM_FIXED_RANGES];
+    unsigned char enabled;
+    mtrr_type def_type;
+};
+
+
+/*  Grab all of the MTRR state for this CPU into *state  */
+static void __init get_mtrr_state(struct mtrr_state *state)
+{
+    unsigned int nvrs, i;
+    struct mtrr_var_range *vrs;
+    unsigned long lo, dummy;
+
+    nvrs = state->num_var_ranges = get_num_var_ranges();
+    vrs = state->var_ranges
+              = kmalloc (nvrs * sizeof (struct mtrr_var_range), GFP_KERNEL);
+    if (vrs == NULL)
+       nvrs = state->num_var_ranges = 0;
+
+    for (i = 0; i < nvrs; i++)
+       get_mtrr_var_range (i, &vrs[i]);
+    get_fixed_ranges (state->fixed_ranges);
+
+    rdmsr (MTRRdefType_MSR, lo, dummy);
+    state->def_type = (lo & 0xff);
+    state->enabled = (lo & 0xc00) >> 10;
+}   /*  End Function get_mtrr_state  */
+
+
+/*  Free resources associated with a struct mtrr_state  */
+static void __init finalize_mtrr_state(struct mtrr_state *state)
+{
+    if (state->var_ranges) kfree (state->var_ranges);
+}   /*  End Function finalize_mtrr_state  */
+
+
+static unsigned long __init set_mtrr_state (struct mtrr_state *state,
+                                               struct set_mtrr_context *ctxt)
+/*  [SUMMARY] Set the MTRR state for this CPU.
+    <state> The MTRR state information to read.
+    <ctxt> Some relevant CPU context.
+    [NOTE] The CPU must already be in a safe state for MTRR changes.
+    [RETURNS] 0 if no changes made, else a mask indication what was changed.
+*/
+{
+    unsigned int i;
+    unsigned long change_mask = 0;
+
+    for (i = 0; i < state->num_var_ranges; i++)
+       if ( set_mtrr_var_range_testing (i, &state->var_ranges[i]) )
+           change_mask |= MTRR_CHANGE_MASK_VARIABLE;
+
+    if ( set_fixed_ranges_testing(state->fixed_ranges) )
+       change_mask |= MTRR_CHANGE_MASK_FIXED;
+    /*  Set_mtrr_restore restores the old value of MTRRdefType,
+       so to set it we fiddle with the saved value  */
+    if ( (ctxt->deftype_lo & 0xff) != state->def_type
+        || ( (ctxt->deftype_lo & 0xc00) >> 10 ) != state->enabled)
+    {
+       ctxt->deftype_lo |= (state->def_type | state->enabled << 10);
+       change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
+    }
+
+    return change_mask;
+}   /*  End Function set_mtrr_state  */
+
+
+static atomic_t undone_count;
+static volatile int wait_barrier_cache_disable = FALSE;
+static volatile int wait_barrier_execute = FALSE;
+static volatile int wait_barrier_cache_enable = FALSE;
+
+struct set_mtrr_data
+{
+    unsigned long smp_base;
+    unsigned long smp_size;
+    unsigned int smp_reg;
+    mtrr_type smp_type;
+};
+
+static void ipi_handler (void *info)
+/*  [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
+    [RETURNS] Nothing.
+*/
+{
+    struct set_mtrr_data *data = info;
+    struct set_mtrr_context ctxt;
+    set_mtrr_prepare_save (&ctxt);
+    /*  Notify master that I've flushed and disabled my cache  */
+    atomic_dec (&undone_count);
+    while (wait_barrier_cache_disable) { rep_nop(); barrier(); }
+    set_mtrr_cache_disable (&ctxt);
+    /*  Notify master that I've flushed and disabled my cache  */
+    atomic_dec (&undone_count);
+    while (wait_barrier_execute) { rep_nop(); barrier(); }
+    /*  The master has cleared me to execute  */
+    (*set_mtrr_up) (data->smp_reg, data->smp_base, data->smp_size,
+                   data->smp_type, FALSE);
+    /*  Notify master CPU that I've executed the function  */
+    atomic_dec (&undone_count);
+    /*  Wait for master to clear me to enable cache and return  */
+    while (wait_barrier_cache_enable) { rep_nop(); barrier(); }
+    set_mtrr_done (&ctxt);
+}   /*  End Function ipi_handler  */
+
+static void set_mtrr_smp (unsigned int reg, unsigned long base,
+                         unsigned long size, mtrr_type type)
+{
+    struct set_mtrr_data data;
+    struct set_mtrr_context ctxt;
+
+    data.smp_reg = reg;
+    data.smp_base = base;
+    data.smp_size = size;
+    data.smp_type = type;
+    wait_barrier_cache_disable = TRUE;
+    wait_barrier_execute = TRUE;
+    wait_barrier_cache_enable = TRUE;
+    atomic_set (&undone_count, smp_num_cpus - 1);
+    /*  Start the ball rolling on other CPUs  */
+    if (smp_call_function (ipi_handler, &data, 1, 0) != 0)
+       panic ("mtrr: timed out waiting for other CPUs\n");
+    /* Flush and disable the local CPU's cache */
+    set_mtrr_prepare_save (&ctxt);
+    /*  Wait for all other CPUs to flush and disable their caches  */
+    while (atomic_read (&undone_count) > 0) { rep_nop(); barrier(); }
+    /* Set up for completion wait and then release other CPUs to change MTRRs*/
+    atomic_set (&undone_count, smp_num_cpus - 1);
+    wait_barrier_cache_disable = FALSE;
+    set_mtrr_cache_disable (&ctxt);
+
+    /*  Wait for all other CPUs to flush and disable their caches  */
+    while (atomic_read (&undone_count) > 0) { rep_nop(); barrier(); }
+    /* Set up for completion wait and then release other CPUs to change MTRRs*/
+    atomic_set (&undone_count, smp_num_cpus - 1);
+    wait_barrier_execute = FALSE;
+    (*set_mtrr_up) (reg, base, size, type, FALSE);
+    /*  Now wait for other CPUs to complete the function  */
+    while (atomic_read (&undone_count) > 0) { rep_nop(); barrier(); }
+    /*  Now all CPUs should have finished the function. Release the barrier to
+       allow them to re-enable their caches and return from their interrupt,
+       then enable the local cache and return  */
+    wait_barrier_cache_enable = FALSE;
+    set_mtrr_done (&ctxt);
+}   /*  End Function set_mtrr_smp  */
+
+
+/*  Some BIOS's are fucked and don't set all MTRRs the same!  */
+static void __init mtrr_state_warn(unsigned long mask)
+{
+    if (!mask) return;
+    if (mask & MTRR_CHANGE_MASK_FIXED)
+       printk ("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
+    if (mask & MTRR_CHANGE_MASK_VARIABLE)
+       printk ("mtrr: your CPUs had inconsistent variable MTRR settings\n");
+    if (mask & MTRR_CHANGE_MASK_DEFTYPE)
+       printk ("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
+    printk ("mtrr: probably your BIOS does not setup all CPUs\n");
+}   /*  End Function mtrr_state_warn  */
+
+#endif  /*  CONFIG_SMP  */
+
+static char *attrib_to_str (int x)
+{
+    return (x <= 6) ? mtrr_strings[x] : "?";
+}   /*  End Function attrib_to_str  */
+
+static void init_table (void)
+{
+    int i, max;
+
+    max = get_num_var_ranges ();
+    if ( ( usage_table = kmalloc (max * sizeof *usage_table, GFP_KERNEL) )
+        == NULL )
+    {
+       printk ("mtrr: could not allocate\n");
+       return;
+    }
+    for (i = 0; i < max; i++) usage_table[i] = 1;
+#ifdef USERSPACE_INTERFACE
+    if ( ( ascii_buffer = kmalloc (max * LINE_SIZE, GFP_KERNEL) ) == NULL )
+    {
+       printk ("mtrr: could not allocate\n");
+       return;
+    }
+    ascii_buf_bytes = 0;
+    compute_ascii ();
+#endif
+}   /*  End Function init_table  */
+
+static int generic_get_free_region (unsigned long base, unsigned long size)
+/*  [SUMMARY] Get a free MTRR.
+    <base> The starting (base) address of the region.
+    <size> The size (in bytes) of the region.
+    [RETURNS] The index of the region on success, else -1 on error.
+*/
+{
+    int i, max;
+    mtrr_type ltype;
+    unsigned long lbase, lsize;
+
+    max = get_num_var_ranges ();
+    for (i = 0; i < max; ++i)
+    {
+       (*get_mtrr) (i, &lbase, &lsize, &ltype);
+       if (lsize == 0) return i;
+    }
+    return -ENOSPC;
+}   /*  End Function generic_get_free_region  */
+
+static int centaur_get_free_region (unsigned long base, unsigned long size)
+/*  [SUMMARY] Get a free MTRR.
+    <base> The starting (base) address of the region.
+    <size> The size (in bytes) of the region.
+    [RETURNS] The index of the region on success, else -1 on error.
+*/
+{
+    int i, max;
+    mtrr_type ltype;
+    unsigned long lbase, lsize;
+
+    max = get_num_var_ranges ();
+    for (i = 0; i < max; ++i)
+    {
+       if(centaur_mcr_reserved & (1<<i))
+               continue;
+       (*get_mtrr) (i, &lbase, &lsize, &ltype);
+       if (lsize == 0) return i;
+    }
+    return -ENOSPC;
+}   /*  End Function generic_get_free_region  */
+
+static int cyrix_get_free_region (unsigned long base, unsigned long size)
+/*  [SUMMARY] Get a free ARR.
+    <base> The starting (base) address of the region.
+    <size> The size (in bytes) of the region.
+    [RETURNS] The index of the region on success, else -1 on error.
+*/
+{
+    int i;
+    mtrr_type ltype;
+    unsigned long lbase, lsize;
+
+    /* If we are to set up a region >32M then look at ARR7 immediately */
+    if (size > 0x2000)
+    {
+       cyrix_get_arr (7, &lbase, &lsize, &ltype);
+       if (lsize == 0) return 7;
+       /*  Else try ARR0-ARR6 first  */
+    }
+    else
+    {
+       for (i = 0; i < 7; i++)
+       {
+           cyrix_get_arr (i, &lbase, &lsize, &ltype);
+           if ((i == 3) && arr3_protected) continue;
+           if (lsize == 0) return i;
+       }
+       /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */
+       cyrix_get_arr (i, &lbase, &lsize, &ltype);
+       if ((lsize == 0) && (size >= 0x40)) return i;
+    }
+    return -ENOSPC;
+}   /*  End Function cyrix_get_free_region  */
+
+static int (*get_free_region) (unsigned long base,
+                              unsigned long size) = generic_get_free_region;
+
+/**
+ *     mtrr_add_page - Add a memory type region
+ *     @base: Physical base address of region in pages (4 KB)
+ *     @size: Physical size of region in pages (4 KB)
+ *     @type: Type of MTRR desired
+ *     @increment: If this is true do usage counting on the region
+ *
+ *     Memory type region registers control the caching on newer Intel and
+ *     non Intel processors. This function allows drivers to request an
+ *     MTRR is added. The details and hardware specifics of each processor's
+ *     implementation are hidden from the caller, but nevertheless the 
+ *     caller should expect to need to provide a power of two size on an
+ *     equivalent power of two boundary.
+ *
+ *     If the region cannot be added either because all regions are in use
+ *     or the CPU cannot support it a negative value is returned. On success
+ *     the register number for this entry is returned, but should be treated
+ *     as a cookie only.
+ *
+ *     On a multiprocessor machine the changes are made to all processors.
+ *     This is required on x86 by the Intel processors.
+ *
+ *     The available types are
+ *
+ *     %MTRR_TYPE_UNCACHABLE   -       No caching
+ *
+ *     %MTRR_TYPE_WRBACK       -       Write data back in bursts whenever
+ *
+ *     %MTRR_TYPE_WRCOMB       -       Write data back soon but allow bursts
+ *
+ *     %MTRR_TYPE_WRTHROUGH    -       Cache reads but not writes
+ *
+ *     BUGS: Needs a quiet flag for the cases where drivers do not mind
+ *     failures and do not wish system log messages to be sent.
+ */
+
+int mtrr_add_page(unsigned long base, unsigned long size, unsigned int type, char increment)
+{
+/*  [SUMMARY] Add an MTRR entry.
+    <base> The starting (base, in pages) address of the region.
+    <size> The size of the region. (in pages)
+    <type> The type of the new region.
+    <increment> If true and the region already exists, the usage count will be
+    incremented.
+    [RETURNS] The MTRR register on success, else a negative number indicating
+    the error code.
+    [NOTE] This routine uses a spinlock.
+*/
+    int i, max;
+    mtrr_type ltype;
+    unsigned long lbase, lsize, last;
+
+    switch ( mtrr_if )
+    {
+    case MTRR_IF_NONE:
+       return -ENXIO;          /* No MTRRs whatsoever */
+
+    case MTRR_IF_AMD_K6:
+       /* Apply the K6 block alignment and size rules
+          In order
+          o Uncached or gathering only
+          o 128K or bigger block
+          o Power of 2 block
+          o base suitably aligned to the power
+       */
+       if ( type > MTRR_TYPE_WRCOMB || size < (1 << (17-PAGE_SHIFT)) ||
+            (size & ~(size-1))-size || ( base & (size-1) ) )
+           return -EINVAL;
+       break;
+
+    case MTRR_IF_INTEL:
+       /*  For Intel PPro stepping <= 7, must be 4 MiB aligned 
+           and not touch 0x70000000->0x7003FFFF */
+       if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+            boot_cpu_data.x86 == 6 &&
+            boot_cpu_data.x86_model == 1 &&
+            boot_cpu_data.x86_mask <= 7 )
+       {
+           if ( base & ((1 << (22-PAGE_SHIFT))-1) )
+           {
+               printk (KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
+               return -EINVAL;
+           }
+           if (!(base + size < 0x70000000 || base > 0x7003FFFF) &&
+                (type == MTRR_TYPE_WRCOMB || type == MTRR_TYPE_WRBACK))
+           {
+               printk (KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
+               return -EINVAL;
+           }
+       }
+       /* Fall through */
+       
+    case MTRR_IF_CYRIX_ARR:
+    case MTRR_IF_CENTAUR_MCR:
+        if ( mtrr_if == MTRR_IF_CENTAUR_MCR )
+       {
+           /*
+            *  FIXME: Winchip2 supports uncached
+            */
+           if (type != MTRR_TYPE_WRCOMB && (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE))
+           {
+               printk (KERN_WARNING "mtrr: only write-combining%s supported\n",
+                       centaur_mcr_type?" and uncacheable are":" is");
+               return -EINVAL;
+           }
+       }
+       else if (base + size < 0x100)
+       {
+           printk (KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%lx000,0x%lx000)\n",
+                   base, size);
+           return -EINVAL;
+       }
+       /*  Check upper bits of base and last are equal and lower bits are 0
+           for base and 1 for last  */
+       last = base + size - 1;
+       for (lbase = base; !(lbase & 1) && (last & 1);
+            lbase = lbase >> 1, last = last >> 1);
+       if (lbase != last)
+       {
+           printk (KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n",
+                   base, size);
+           return -EINVAL;
+       }
+       break;
+
+    default:
+       return -EINVAL;
+    }
+
+    if (type >= MTRR_NUM_TYPES)
+    {
+       printk ("mtrr: type: %u illegal\n", type);
+       return -EINVAL;
+    }
+
+    /*  If the type is WC, check that this processor supports it  */
+    if ( (type == MTRR_TYPE_WRCOMB) && !have_wrcomb () )
+    {
+        printk (KERN_WARNING "mtrr: your processor doesn't support write-combining\n");
+        return -ENOSYS;
+    }
+
+    if ( base & size_or_mask || size  & size_or_mask )
+    {
+       printk ("mtrr: base or size exceeds the MTRR width\n");
+       return -EINVAL;
+    }
+
+    increment = increment ? 1 : 0;
+    max = get_num_var_ranges ();
+    /*  Search for existing MTRR  */
+    down(&main_lock);
+    for (i = 0; i < max; ++i)
+    {
+       (*get_mtrr) (i, &lbase, &lsize, &ltype);
+       if (base >= lbase + lsize) continue;
+       if ( (base < lbase) && (base + size <= lbase) ) continue;
+       /*  At this point we know there is some kind of overlap/enclosure  */
+       if ( (base < lbase) || (base + size > lbase + lsize) )
+       {
+           up(&main_lock);
+           printk (KERN_WARNING "mtrr: 0x%lx000,0x%lx000 overlaps existing"
+                   " 0x%lx000,0x%lx000\n",
+                   base, size, lbase, lsize);
+           return -EINVAL;
+       }
+       /*  New region is enclosed by an existing region  */
+       if (ltype != type)
+       {
+           if (type == MTRR_TYPE_UNCACHABLE) continue;
+           up(&main_lock);
+           printk ( "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
+                    base, size, attrib_to_str (ltype), attrib_to_str (type) );
+           return -EINVAL;
+       }
+       if (increment) ++usage_table[i];
+       compute_ascii ();
+       up(&main_lock);
+       return i;
+    }
+    /*  Search for an empty MTRR  */
+    i = (*get_free_region) (base, size);
+    if (i < 0)
+    {
+       up(&main_lock);
+       printk ("mtrr: no more MTRRs available\n");
+       return i;
+    }
+    set_mtrr (i, base, size, type);
+    usage_table[i] = 1;
+    compute_ascii ();
+    up(&main_lock);
+    return i;
+}   /*  End Function mtrr_add_page  */
+
+/**
+ *     mtrr_add - Add a memory type region
+ *     @base: Physical base address of region
+ *     @size: Physical size of region
+ *     @type: Type of MTRR desired
+ *     @increment: If this is true do usage counting on the region
+ *
+ *     Memory type region registers control the caching on newer Intel and
+ *     non Intel processors. This function allows drivers to request an
+ *     MTRR is added. The details and hardware specifics of each processor's
+ *     implementation are hidden from the caller, but nevertheless the 
+ *     caller should expect to need to provide a power of two size on an
+ *     equivalent power of two boundary.
+ *
+ *     If the region cannot be added either because all regions are in use
+ *     or the CPU cannot support it a negative value is returned. On success
+ *     the register number for this entry is returned, but should be treated
+ *     as a cookie only.
+ *
+ *     On a multiprocessor machine the changes are made to all processors.
+ *     This is required on x86 by the Intel processors.
+ *
+ *     The available types are
+ *
+ *     %MTRR_TYPE_UNCACHABLE   -       No caching
+ *
+ *     %MTRR_TYPE_WRBACK       -       Write data back in bursts whenever
+ *
+ *     %MTRR_TYPE_WRCOMB       -       Write data back soon but allow bursts
+ *
+ *     %MTRR_TYPE_WRTHROUGH    -       Cache reads but not writes
+ *
+ *     BUGS: Needs a quiet flag for the cases where drivers do not mind
+ *     failures and do not wish system log messages to be sent.
+ */
+
+int mtrr_add(unsigned long base, unsigned long size, unsigned int type, char increment)
+{
+/*  [SUMMARY] Add an MTRR entry.
+    <base> The starting (base) address of the region.
+    <size> The size (in bytes) of the region.
+    <type> The type of the new region.
+    <increment> If true and the region already exists, the usage count will be
+    incremented.
+    [RETURNS] The MTRR register on success, else a negative number indicating
+    the error code.
+*/
+
+    if ( (base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)) )
+    {
+       printk ("mtrr: size and base must be multiples of 4 kiB\n");
+       printk ("mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
+       return -EINVAL;
+    }
+    return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, increment);
+}   /*  End Function mtrr_add  */
+
+/**
+ *     mtrr_del_page - delete a memory type region
+ *     @reg: Register returned by mtrr_add
+ *     @base: Physical base address
+ *     @size: Size of region
+ *
+ *     If register is supplied then base and size are ignored. This is
+ *     how drivers should call it.
+ *
+ *     Releases an MTRR region. If the usage count drops to zero the 
+ *     register is freed and the region returns to default state.
+ *     On success the register is returned, on failure a negative error
+ *     code.
+ */
+int mtrr_del_page (int reg, unsigned long base, unsigned long size)
+/*  [SUMMARY] Delete MTRR/decrement usage count.
+    <reg> The register. If this is less than 0 then <<base>> and <<size>> must
+    be supplied.
+    <base> The base address of the region. This is ignored if <<reg>> is >= 0.
+    <size> The size of the region. This is ignored if <<reg>> is >= 0.
+    [RETURNS] The register on success, else a negative number indicating
+    the error code.
+    [NOTE] This routine uses a spinlock.
+*/
+{
+    int i, max;
+    mtrr_type ltype;
+    unsigned long lbase, lsize;
+
+    if ( mtrr_if == MTRR_IF_NONE ) return -ENXIO;
+
+    max = get_num_var_ranges ();
+    down (&main_lock);
+    if (reg < 0)
+    {
+       /*  Search for existing MTRR  */
+       for (i = 0; i < max; ++i)
+       {
+           (*get_mtrr) (i, &lbase, &lsize, &ltype);
+           if (lbase == base && lsize == size)
+           {
+               reg = i;
+               break;
+           }
+       }
+       if (reg < 0)
+       {
+           up(&main_lock);
+           printk ("mtrr: no MTRR for %lx000,%lx000 found\n", base, size);
+           return -EINVAL;
+       }
+    }
+    if (reg >= max)
+    {
+       up (&main_lock);
+       printk ("mtrr: register: %d too big\n", reg);
+       return -EINVAL;
+    }
+    if ( mtrr_if == MTRR_IF_CYRIX_ARR )
+    {
+       if ( (reg == 3) && arr3_protected )
+       {
+           up (&main_lock);
+           printk ("mtrr: ARR3 cannot be changed\n");
+           return -EINVAL;
+       }
+    }
+    (*get_mtrr) (reg, &lbase, &lsize, &ltype);
+    if (lsize < 1)
+    {
+       up (&main_lock);
+       printk ("mtrr: MTRR %d not used\n", reg);
+       return -EINVAL;
+    }
+    if (usage_table[reg] < 1)
+    {
+       up (&main_lock);
+       printk ("mtrr: reg: %d has count=0\n", reg);
+       return -EINVAL;
+    }
+    if (--usage_table[reg] < 1) set_mtrr (reg, 0, 0, 0);
+    compute_ascii ();
+    up (&main_lock);
+    return reg;
+}   /*  End Function mtrr_del_page  */
+
+/**
+ *     mtrr_del - delete a memory type region
+ *     @reg: Register returned by mtrr_add
+ *     @base: Physical base address
+ *     @size: Size of region
+ *
+ *     If register is supplied then base and size are ignored. This is
+ *     how drivers should call it.
+ *
+ *     Releases an MTRR region. If the usage count drops to zero the 
+ *     register is freed and the region returns to default state.
+ *     On success the register is returned, on failure a negative error
+ *     code.
+ */
+int mtrr_del (int reg, unsigned long base, unsigned long size)
+/*  [SUMMARY] Delete MTRR/decrement usage count.
+    <reg> The register. If this is less than 0 then <<base>> and <<size>> must
+    be supplied.
+    <base> The base address of the region. This is ignored if <<reg>> is >= 0.
+    <size> The size of the region. This is ignored if <<reg>> is >= 0.
+    [RETURNS] The register on success, else a negative number indicating
+    the error code.
+*/
+{
+    if ( (base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)) )
+    {
+       printk ("mtrr: size and base must be multiples of 4 kiB\n");
+       printk ("mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
+       return -EINVAL;
+    }
+    return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
+}
+
+#ifdef USERSPACE_INTERFACE
+
+static int mtrr_file_add (unsigned long base, unsigned long size,
+                         unsigned int type, char increment, struct file *file, int page)
+{
+    int reg, max;
+    unsigned int *fcount = file->private_data;
+
+    max = get_num_var_ranges ();
+    if (fcount == NULL)
+    {
+       if ( ( fcount = kmalloc (max * sizeof *fcount, GFP_KERNEL) ) == NULL )
+       {
+           printk ("mtrr: could not allocate\n");
+           return -ENOMEM;
+       }
+       memset (fcount, 0, max * sizeof *fcount);
+       file->private_data = fcount;
+    }
+    if (!page) {
+       if ( (base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)) )
+       {
+           printk ("mtrr: size and base must be multiples of 4 kiB\n");
+           printk ("mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
+           return -EINVAL;
+       }
+       base >>= PAGE_SHIFT;
+       size >>= PAGE_SHIFT;
+    }
+    reg = mtrr_add_page (base, size, type, 1);
+    if (reg >= 0) ++fcount[reg];
+    return reg;
+}   /*  End Function mtrr_file_add  */
+
+static int mtrr_file_del (unsigned long base, unsigned long size,
+                         struct file *file, int page)
+{
+    int reg;
+    unsigned int *fcount = file->private_data;
+
+    if (!page) {
+       if ( (base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)) )
+       {
+           printk ("mtrr: size and base must be multiples of 4 kiB\n");
+           printk ("mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
+           return -EINVAL;
+       }
+       base >>= PAGE_SHIFT;
+       size >>= PAGE_SHIFT;
+    }
+    reg = mtrr_del_page (-1, base, size);
+    if (reg < 0) return reg;
+    if (fcount == NULL) return reg;
+    if (fcount[reg] < 1) return -EINVAL;
+    --fcount[reg];
+    return reg;
+}   /*  End Function mtrr_file_del  */
+
+static ssize_t mtrr_read (struct file *file, char *buf, size_t len,
+                         loff_t *ppos)
+{
+    if (*ppos >= ascii_buf_bytes) return 0;
+    if (*ppos + len > ascii_buf_bytes) len = ascii_buf_bytes - *ppos;
+    if ( copy_to_user (buf, ascii_buffer + *ppos, len) ) return -EFAULT;
+    *ppos += len;
+    return len;
+}   /*  End Function mtrr_read  */
+
+static ssize_t mtrr_write (struct file *file, const char *buf, size_t len,
+                          loff_t *ppos)
+/*  Format of control line:
+    "base=%Lx size=%Lx type=%s"     OR:
+    "disable=%d"
+*/
+{
+    int i, err;
+    unsigned long reg;
+    unsigned long long base, size;
+    char *ptr;
+    char line[LINE_SIZE];
+
+    if ( !suser () ) return -EPERM;
+    /*  Can't seek (pwrite) on this device  */
+    if (ppos != &file->f_pos) return -ESPIPE;
+    memset (line, 0, LINE_SIZE);
+    if (len > LINE_SIZE) len = LINE_SIZE;
+    if ( copy_from_user (line, buf, len - 1) ) return -EFAULT;
+    ptr = line + strlen (line) - 1;
+    if (*ptr == '\n') *ptr = '\0';
+    if ( !strncmp (line, "disable=", 8) )
+    {
+       reg = simple_strtoul (line + 8, &ptr, 0);
+       err = mtrr_del_page (reg, 0, 0);
+       if (err < 0) return err;
+       return len;
+    }
+    if ( strncmp (line, "base=", 5) )
+    {
+       printk ("mtrr: no \"base=\" in line: \"%s\"\n", line);
+       return -EINVAL;
+    }
+    base = simple_strtoull (line + 5, &ptr, 0);
+    for (; isspace (*ptr); ++ptr);
+    if ( strncmp (ptr, "size=", 5) )
+    {
+       printk ("mtrr: no \"size=\" in line: \"%s\"\n", line);
+       return -EINVAL;
+    }
+    size = simple_strtoull (ptr + 5, &ptr, 0);
+    if ( (base & 0xfff) || (size & 0xfff) )
+    {
+       printk ("mtrr: size and base must be multiples of 4 kiB\n");
+       printk ("mtrr: size: 0x%Lx  base: 0x%Lx\n", size, base);
+       return -EINVAL;
+    }
+    for (; isspace (*ptr); ++ptr);
+    if ( strncmp (ptr, "type=", 5) )
+    {
+       printk ("mtrr: no \"type=\" in line: \"%s\"\n", line);
+       return -EINVAL;
+    }
+    ptr += 5;
+    for (; isspace (*ptr); ++ptr);
+    for (i = 0; i < MTRR_NUM_TYPES; ++i)
+    {
+       if ( strcmp (ptr, mtrr_strings[i]) ) continue;
+       base >>= PAGE_SHIFT;
+       size >>= PAGE_SHIFT;
+       err = mtrr_add_page ((unsigned long)base, (unsigned long)size, i, 1);
+       if (err < 0) return err;
+       return len;
+    }
+    printk ("mtrr: illegal type: \"%s\"\n", ptr);
+    return -EINVAL;
+}   /*  End Function mtrr_write  */
+
+static int mtrr_ioctl (struct inode *inode, struct file *file,
+                      unsigned int cmd, unsigned long arg)
+{
+    int err;
+    mtrr_type type;
+    struct mtrr_sentry sentry;
+    struct mtrr_gentry gentry;
+
+    switch (cmd)
+    {
+      default:
+       return -ENOIOCTLCMD;
+      case MTRRIOC_ADD_ENTRY:
+       if ( !suser () ) return -EPERM;
+       if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) )
+           return -EFAULT;
+       err = mtrr_file_add (sentry.base, sentry.size, sentry.type, 1, file, 0);
+       if (err < 0) return err;
+       break;
+      case MTRRIOC_SET_ENTRY:
+       if ( !suser () ) return -EPERM;
+       if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) )
+           return -EFAULT;
+       err = mtrr_add (sentry.base, sentry.size, sentry.type, 0);
+       if (err < 0) return err;
+       break;
+      case MTRRIOC_DEL_ENTRY:
+       if ( !suser () ) return -EPERM;
+       if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) )
+           return -EFAULT;
+       err = mtrr_file_del (sentry.base, sentry.size, file, 0);
+       if (err < 0) return err;
+       break;
+      case MTRRIOC_KILL_ENTRY:
+       if ( !suser () ) return -EPERM;
+       if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) )
+           return -EFAULT;
+       err = mtrr_del (-1, sentry.base, sentry.size);
+       if (err < 0) return err;
+       break;
+      case MTRRIOC_GET_ENTRY:
+       if ( copy_from_user (&gentry, (void *) arg, sizeof gentry) )
+           return -EFAULT;
+       if ( gentry.regnum >= get_num_var_ranges () ) return -EINVAL;
+       (*get_mtrr) (gentry.regnum, &gentry.base, &gentry.size, &type);
+
+       /* Hide entries that go above 4GB */
+       if (gentry.base + gentry.size > 0x100000 || gentry.size == 0x100000)
+           gentry.base = gentry.size = gentry.type = 0;
+       else {
+           gentry.base <<= PAGE_SHIFT;
+           gentry.size <<= PAGE_SHIFT;
+           gentry.type = type;
+       }
+
+       if ( copy_to_user ( (void *) arg, &gentry, sizeof gentry) )
+            return -EFAULT;
+       break;
+      case MTRRIOC_ADD_PAGE_ENTRY:
+       if ( !suser () ) return -EPERM;
+       if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) )
+           return -EFAULT;
+       err = mtrr_file_add (sentry.base, sentry.size, sentry.type, 1, file, 1);
+       if (err < 0) return err;
+       break;
+      case MTRRIOC_SET_PAGE_ENTRY:
+       if ( !suser () ) return -EPERM;
+       if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) )
+           return -EFAULT;
+       err = mtrr_add_page (sentry.base, sentry.size, sentry.type, 0);
+       if (err < 0) return err;
+       break;
+      case MTRRIOC_DEL_PAGE_ENTRY:
+       if ( !suser () ) return -EPERM;
+       if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) )
+           return -EFAULT;
+       err = mtrr_file_del (sentry.base, sentry.size, file, 1);
+       if (err < 0) return err;
+       break;
+      case MTRRIOC_KILL_PAGE_ENTRY:
+       if ( !suser () ) return -EPERM;
+       if ( copy_from_user (&sentry, (void *) arg, sizeof sentry) )
+           return -EFAULT;
+       err = mtrr_del_page (-1, sentry.base, sentry.size);
+       if (err < 0) return err;
+       break;
+      case MTRRIOC_GET_PAGE_ENTRY:
+       if ( copy_from_user (&gentry, (void *) arg, sizeof gentry) )
+           return -EFAULT;
+       if ( gentry.regnum >= get_num_var_ranges () ) return -EINVAL;
+       (*get_mtrr) (gentry.regnum, &gentry.base, &gentry.size, &type);
+       gentry.type = type;
+
+       if ( copy_to_user ( (void *) arg, &gentry, sizeof gentry) )
+            return -EFAULT;
+       break;
+    }
+    return 0;
+}   /*  End Function mtrr_ioctl  */
+
+static int mtrr_close (struct inode *ino, struct file *file)
+{
+    int i, max;
+    unsigned int *fcount = file->private_data;
+
+    if (fcount == NULL) return 0;
+    max = get_num_var_ranges ();
+    for (i = 0; i < max; ++i)
+    {
+       while (fcount[i] > 0)
+       {
+           if (mtrr_del (i, 0, 0) < 0) printk ("mtrr: reg %d not used\n", i);
+           --fcount[i];
+       }
+    }
+    kfree (fcount);
+    file->private_data = NULL;
+    return 0;
+}   /*  End Function mtrr_close  */
+
+static struct file_operations mtrr_fops =
+{
+    owner:     THIS_MODULE,
+    read:      mtrr_read,
+    write:     mtrr_write,
+    ioctl:     mtrr_ioctl,
+    release:   mtrr_close,
+};
+
+#  ifdef CONFIG_PROC_FS
+
+static struct proc_dir_entry *proc_root_mtrr;
+
+#  endif  /*  CONFIG_PROC_FS  */
+
+static devfs_handle_t devfs_handle;
+
+static void compute_ascii (void)
+{
+    char factor;
+    int i, max;
+    mtrr_type type;
+    unsigned long base, size;
+
+    ascii_buf_bytes = 0;
+    max = get_num_var_ranges ();
+    for (i = 0; i < max; i++)
+    {
+       (*get_mtrr) (i, &base, &size, &type);
+       if (size == 0) usage_table[i] = 0;
+       else
+       {
+           if (size < (0x100000 >> PAGE_SHIFT))
+           {
+               /* less than 1MB */
+               factor = 'K';
+               size <<= PAGE_SHIFT - 10;
+           }
+           else
+           {
+               factor = 'M';
+               size >>= 20 - PAGE_SHIFT;
+           }
+           sprintf
+               (ascii_buffer + ascii_buf_bytes,
+                "reg%02i: base=0x%05lx000 (%4liMB), size=%4li%cB: %s, count=%d\n",
+                i, base, base >> (20 - PAGE_SHIFT), size, factor,
+                attrib_to_str (type), usage_table[i]);
+           ascii_buf_bytes += strlen (ascii_buffer + ascii_buf_bytes);
+       }
+    }
+    devfs_set_file_size (devfs_handle, ascii_buf_bytes);
+#  ifdef CONFIG_PROC_FS
+    if (proc_root_mtrr)
+       proc_root_mtrr->size = ascii_buf_bytes;
+#  endif  /*  CONFIG_PROC_FS  */
+}   /*  End Function compute_ascii  */
+
+#endif  /*  USERSPACE_INTERFACE  */
+
+EXPORT_SYMBOL(mtrr_add);
+EXPORT_SYMBOL(mtrr_del);
+
+#ifdef CONFIG_SMP
+
+typedef struct
+{
+    unsigned long base;
+    unsigned long size;
+    mtrr_type type;
+} arr_state_t;
+
+arr_state_t arr_state[8] __initdata =
+{
+    {0UL,0UL,0UL}, {0UL,0UL,0UL}, {0UL,0UL,0UL}, {0UL,0UL,0UL},
+    {0UL,0UL,0UL}, {0UL,0UL,0UL}, {0UL,0UL,0UL}, {0UL,0UL,0UL}
+};
+
+unsigned char ccr_state[7] __initdata = { 0, 0, 0, 0, 0, 0, 0 };
+
+static void __init cyrix_arr_init_secondary(void)
+{
+    struct set_mtrr_context ctxt;
+    int i;
+
+    /* flush cache and enable MAPEN */
+    set_mtrr_prepare_save (&ctxt);
+    set_mtrr_cache_disable (&ctxt);
+
+     /* the CCRs are not contiguous */
+    for(i=0; i<4; i++) setCx86(CX86_CCR0 + i, ccr_state[i]);
+    for(   ; i<7; i++) setCx86(CX86_CCR4 + i, ccr_state[i]);
+    for(i=0; i<8; i++)
+      cyrix_set_arr_up(i,
+        arr_state[i].base, arr_state[i].size, arr_state[i].type, FALSE);
+
+    set_mtrr_done (&ctxt); /* flush cache and disable MAPEN */
+}   /*  End Function cyrix_arr_init_secondary  */
+
+#endif
+
+/*
+ * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection
+ * with the SMM (System Management Mode) mode. So we need the following:
+ * Check whether SMI_LOCK (CCR3 bit 0) is set
+ *   if it is set, write a warning message: ARR3 cannot be changed!
+ *     (it cannot be changed until the next processor reset)
+ *   if it is reset, then we can change it, set all the needed bits:
+ *   - disable access to SMM memory through ARR3 range (CCR1 bit 7 reset)
+ *   - disable access to SMM memory (CCR1 bit 2 reset)
+ *   - disable SMM mode (CCR1 bit 1 reset)
+ *   - disable write protection of ARR3 (CCR6 bit 1 reset)
+ *   - (maybe) disable ARR3
+ * Just to be sure, we enable ARR usage by the processor (CCR5 bit 5 set)
+ */
+static void __init cyrix_arr_init(void)
+{
+    struct set_mtrr_context ctxt;
+    unsigned char ccr[7];
+    int ccrc[7] = { 0, 0, 0, 0, 0, 0, 0 };
+#ifdef CONFIG_SMP
+    int i;
+#endif
+
+    /* flush cache and enable MAPEN */
+    set_mtrr_prepare_save (&ctxt);
+    set_mtrr_cache_disable (&ctxt);
+
+    /* Save all CCRs locally */
+    ccr[0] = getCx86 (CX86_CCR0);
+    ccr[1] = getCx86 (CX86_CCR1);
+    ccr[2] = getCx86 (CX86_CCR2);
+    ccr[3] = ctxt.ccr3;
+    ccr[4] = getCx86 (CX86_CCR4);
+    ccr[5] = getCx86 (CX86_CCR5);
+    ccr[6] = getCx86 (CX86_CCR6);
+
+    if (ccr[3] & 1)
+    {
+       ccrc[3] = 1;
+       arr3_protected = 1;
+    }
+    else
+    {
+       /* Disable SMM mode (bit 1), access to SMM memory (bit 2) and
+        * access to SMM memory through ARR3 (bit 7).
+        */
+       if (ccr[1] & 0x80) { ccr[1] &= 0x7f; ccrc[1] |= 0x80; }
+       if (ccr[1] & 0x04) { ccr[1] &= 0xfb; ccrc[1] |= 0x04; }
+       if (ccr[1] & 0x02) { ccr[1] &= 0xfd; ccrc[1] |= 0x02; }
+       arr3_protected = 0;
+       if (ccr[6] & 0x02) {
+           ccr[6] &= 0xfd; ccrc[6] = 1; /* Disable write protection of ARR3 */
+           setCx86 (CX86_CCR6, ccr[6]);
+       }
+       /* Disable ARR3. This is safe now that we disabled SMM. */
+       /* cyrix_set_arr_up (3, 0, 0, 0, FALSE); */
+    }
+    /* If we changed CCR1 in memory, change it in the processor, too. */
+    if (ccrc[1]) setCx86 (CX86_CCR1, ccr[1]);
+
+    /* Enable ARR usage by the processor */
+    if (!(ccr[5] & 0x20))
+    {
+       ccr[5] |= 0x20; ccrc[5] = 1;
+       setCx86 (CX86_CCR5, ccr[5]);
+    }
+
+#ifdef CONFIG_SMP
+    for(i=0; i<7; i++) ccr_state[i] = ccr[i];
+    for(i=0; i<8; i++)
+      cyrix_get_arr(i,
+        &arr_state[i].base, &arr_state[i].size, &arr_state[i].type);
+#endif
+
+    set_mtrr_done (&ctxt); /* flush cache and disable MAPEN */
+
+    if ( ccrc[5] ) printk ("mtrr: ARR usage was not enabled, enabled manually\n");
+    if ( ccrc[3] ) printk ("mtrr: ARR3 cannot be changed\n");
+/*
+    if ( ccrc[1] & 0x80) printk ("mtrr: SMM memory access through ARR3 disabled\n");
+    if ( ccrc[1] & 0x04) printk ("mtrr: SMM memory access disabled\n");
+    if ( ccrc[1] & 0x02) printk ("mtrr: SMM mode disabled\n");
+*/
+    if ( ccrc[6] ) printk ("mtrr: ARR3 was write protected, unprotected\n");
+}   /*  End Function cyrix_arr_init  */
+
+/*
+ *     Initialise the later (saner) Winchip MCR variant. In this version
+ *     the BIOS can pass us the registers it has used (but not their values)
+ *     and the control register is read/write
+ */
+static void __init centaur_mcr1_init(void)
+{
+    unsigned i;
+    u32 lo, hi;
+
+    /* Unfortunately, MCR's are read-only, so there is no way to
+     * find out what the bios might have done.
+     */
+     
+    rdmsr(0x120, lo, hi);
+    if(((lo>>17)&7)==1)                /* Type 1 Winchip2 MCR */
+    {
+       lo&= ~0x1C0;            /* clear key */
+       lo|= 0x040;             /* set key to 1 */
+       wrmsr(0x120, lo, hi);   /* unlock MCR */
+    }    
+    
+    centaur_mcr_type = 1;
+    
+    /*
+     * Clear any unconfigured MCR's.
+     */
+
+    for (i = 0; i < 8; ++i)
+    {
+       if(centaur_mcr[i]. high == 0 && centaur_mcr[i].low == 0)
+       {
+               if(!(lo & (1<<(9+i))))
+                       wrmsr (0x110 + i , 0, 0);
+               else
+                       /*
+                        *      If the BIOS set up an MCR we cannot see it
+                        *      but we don't wish to obliterate it
+                        */
+                       centaur_mcr_reserved |= (1<<i);
+       }
+    }
+    /*  
+     * Throw the main write-combining switch... 
+     * However if OOSTORE is enabled then people have already done far
+     *  cleverer things and we should behave. 
+     */
+
+    lo |= 15;                  /* Write combine enables */
+    wrmsr(0x120, lo, hi);
+}   /*  End Function centaur_mcr1_init  */
+
+/*
+ *     Initialise the original winchip with read only MCR registers
+ *     no used bitmask for the BIOS to pass on and write only control
+ */
+static void __init centaur_mcr0_init(void)
+{
+    unsigned i;
+
+    /* Unfortunately, MCR's are read-only, so there is no way to
+     * find out what the bios might have done.
+     */
+     
+    /* Clear any unconfigured MCR's.
+     * This way we are sure that the centaur_mcr array contains the actual
+     * values. The disadvantage is that any BIOS tweaks are thus undone.
+     *
+     */
+    for (i = 0; i < 8; ++i)
+    {
+       if(centaur_mcr[i]. high == 0 && centaur_mcr[i].low == 0)
+               wrmsr (0x110 + i , 0, 0);
+    }
+
+    wrmsr(0x120, 0x01F0001F, 0);       /* Write only */
+}   /*  End Function centaur_mcr0_init  */
+
+/*
+ *     Initialise Winchip series MCR registers
+ */
+static void __init centaur_mcr_init(void)
+{
+    struct set_mtrr_context ctxt;
+
+    set_mtrr_prepare_save (&ctxt);
+    set_mtrr_cache_disable (&ctxt);
+
+    if(boot_cpu_data.x86_model==4)
+       centaur_mcr0_init();
+    else if(boot_cpu_data.x86_model==8 || boot_cpu_data.x86_model == 9)
+       centaur_mcr1_init();
+
+    set_mtrr_done (&ctxt);
+}   /*  End Function centaur_mcr_init  */
+
+static int __init mtrr_setup(void)
+{
+    if ( test_bit(X86_FEATURE_MTRR, &boot_cpu_data.x86_capability) ) {
+       /* Intel (P6) standard MTRRs */
+       mtrr_if = MTRR_IF_INTEL;
+       get_mtrr = intel_get_mtrr;
+       set_mtrr_up = intel_set_mtrr_up;
+       switch (boot_cpu_data.x86_vendor) {
+
+       case X86_VENDOR_AMD:
+               /* The original Athlon docs said that
+                  total addressable memory is 44 bits wide.
+                  It was not really clear whether its MTRRs
+                  follow this or not. (Read: 44 or 36 bits).
+                  However, "x86-64_overview.pdf" explicitly
+                  states that "previous implementations support
+                  36 bit MTRRs" and also provides a way to
+                  query the width (in bits) of the physical
+                  addressable memory on the Hammer family.
+                */
+               if (boot_cpu_data.x86 == 7 && (cpuid_eax(0x80000000) >= 0x80000008)) {
+                       u32     phys_addr;
+                       phys_addr = cpuid_eax(0x80000008) & 0xff ;
+                       size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
+                       size_and_mask = ~size_or_mask & 0xfff00000;
+                       break;
+               }
+               size_or_mask  = 0xff000000; /* 36 bits */
+               size_and_mask = 0x00f00000;
+               break;
+
+       case X86_VENDOR_CENTAUR:
+               /* VIA Cyrix family have Intel style MTRRs, but don't support PAE */
+               if (boot_cpu_data.x86 == 6) {
+                       size_or_mask  = 0xfff00000; /* 32 bits */
+                       size_and_mask = 0;
+               }
+               break;
+
+       default:
+               /* Intel, etc. */
+               size_or_mask  = 0xff000000; /* 36 bits */
+               size_and_mask = 0x00f00000;
+               break;
+       }
+
+    } else if ( test_bit(X86_FEATURE_K6_MTRR, &boot_cpu_data.x86_capability) ) {
+       /* Pre-Athlon (K6) AMD CPU MTRRs */
+       mtrr_if = MTRR_IF_AMD_K6;
+       get_mtrr = amd_get_mtrr;
+       set_mtrr_up = amd_set_mtrr_up;
+       size_or_mask  = 0xfff00000; /* 32 bits */
+       size_and_mask = 0;
+    } else if ( test_bit(X86_FEATURE_CYRIX_ARR, &boot_cpu_data.x86_capability) ) {
+       /* Cyrix ARRs */
+       mtrr_if = MTRR_IF_CYRIX_ARR;
+       get_mtrr = cyrix_get_arr;
+       set_mtrr_up = cyrix_set_arr_up;
+       get_free_region = cyrix_get_free_region;
+       cyrix_arr_init();
+       size_or_mask  = 0xfff00000; /* 32 bits */
+       size_and_mask = 0;
+    } else if ( test_bit(X86_FEATURE_CENTAUR_MCR, &boot_cpu_data.x86_capability) ) {
+       /* Centaur MCRs */
+       mtrr_if = MTRR_IF_CENTAUR_MCR;
+       get_mtrr = centaur_get_mcr;
+       set_mtrr_up = centaur_set_mcr_up;
+       get_free_region = centaur_get_free_region;
+       centaur_mcr_init();
+       size_or_mask  = 0xfff00000; /* 32 bits */
+       size_and_mask = 0;
+    } else {
+       /* No supported MTRR interface */
+       mtrr_if = MTRR_IF_NONE;
+    }
+
+    printk ("mtrr: v%s Richard Gooch (rgooch@atnf.csiro.au)\n"
+           "mtrr: detected mtrr type: %s\n",
+           MTRR_VERSION, mtrr_if_name[mtrr_if]);
+
+    return (mtrr_if != MTRR_IF_NONE);
+}   /*  End Function mtrr_setup  */
+
+#ifdef CONFIG_SMP
+
+static volatile unsigned long smp_changes_mask __initdata = 0;
+static struct mtrr_state smp_mtrr_state __initdata = {0, 0};
+
+void __init mtrr_init_boot_cpu(void)
+{
+    if ( !mtrr_setup () )
+       return;
+
+    if ( mtrr_if == MTRR_IF_INTEL ) {
+       /* Only for Intel MTRRs */
+       get_mtrr_state (&smp_mtrr_state);
+    }
+}   /*  End Function mtrr_init_boot_cpu  */
+
+static void __init intel_mtrr_init_secondary_cpu(void)
+{
+    unsigned long mask, count;
+    struct set_mtrr_context ctxt;
+
+    /*  Note that this is not ideal, since the cache is only flushed/disabled
+       for this CPU while the MTRRs are changed, but changing this requires
+       more invasive changes to the way the kernel boots  */
+    set_mtrr_prepare_save (&ctxt);
+    set_mtrr_cache_disable (&ctxt);
+    mask = set_mtrr_state (&smp_mtrr_state, &ctxt);
+    set_mtrr_done (&ctxt);
+    /*  Use the atomic bitops to update the global mask  */
+    for (count = 0; count < sizeof mask * 8; ++count)
+    {
+       if (mask & 0x01) set_bit (count, &smp_changes_mask);
+       mask >>= 1;
+    }
+}   /*  End Function intel_mtrr_init_secondary_cpu  */
+
+void __init mtrr_init_secondary_cpu(void)
+{
+    switch ( mtrr_if ) {
+    case MTRR_IF_INTEL:
+       /* Intel (P6) standard MTRRs */
+       intel_mtrr_init_secondary_cpu();
+       break;
+    case MTRR_IF_CYRIX_ARR:
+       /* This is _completely theoretical_!
+        * I assume here that one day Cyrix will support Intel APIC.
+        * In reality on non-Intel CPUs we won't even get to this routine.
+        * Hopefully no one will plug two Cyrix processors in a dual P5 board.
+        *  :-)
+        */
+       cyrix_arr_init_secondary ();
+       break;
+    default:
+       /* I see no MTRRs I can support in SMP mode... */
+       printk ("mtrr: SMP support incomplete for this vendor\n");
+    }
+}   /*  End Function mtrr_init_secondary_cpu  */
+#endif  /*  CONFIG_SMP  */
+
+int __init mtrr_init(void)
+{
+#ifdef CONFIG_SMP
+    /* mtrr_setup() should already have been called from mtrr_init_boot_cpu() */
+
+    if ( mtrr_if == MTRR_IF_INTEL ) {
+       finalize_mtrr_state (&smp_mtrr_state);
+       mtrr_state_warn (smp_changes_mask);
+    }
+#else
+    if ( !mtrr_setup() )
+       return 0;               /* MTRRs not supported? */
+#endif
+
+#ifdef CONFIG_PROC_FS
+    proc_root_mtrr = create_proc_entry ("mtrr", S_IWUSR | S_IRUGO, &proc_root);
+    if (proc_root_mtrr) {
+       proc_root_mtrr->owner = THIS_MODULE;
+       proc_root_mtrr->proc_fops = &mtrr_fops;
+    }
+#endif
+    devfs_handle = devfs_register (NULL, "cpu/mtrr", DEVFS_FL_DEFAULT, 0, 0,
+                                  S_IFREG | S_IRUGO | S_IWUSR,
+                                  &mtrr_fops, NULL);
+    init_table ();
+    return 0;
+}   /*  End Function mtrr_init  */
+
+/*
+ * Local Variables:
+ * mode:c
+ * c-file-style:"k&r"
+ * c-basic-offset:4
+ * End:
+ */
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
new file mode 100644 (file)
index 0000000..fce7b39
--- /dev/null
@@ -0,0 +1,272 @@
+/*
+ *  linux/arch/x86_64/nmi.c
+ *
+ *  NMI watchdog support on APIC systems
+ *
+ *  Started by Ingo Molnar <mingo@redhat.com>
+ *
+ *  Fixes:
+ *  Mikael Pettersson  : AMD K7 support for local APIC NMI watchdog.
+ *  Mikael Pettersson  : Power Management for local APIC NMI watchdog.
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+#include <linux/kernel_stat.h>
+
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/mpspec.h>
+
+unsigned int nmi_watchdog = NMI_NONE;
+static unsigned int nmi_hz = HZ;
+unsigned int nmi_perfctr_msr;  /* the MSR to reset in NMI handler */
+extern void show_registers(struct pt_regs *regs);
+
+#define K7_EVNTSEL_ENABLE      (1 << 22)
+#define K7_EVNTSEL_INT         (1 << 20)
+#define K7_EVNTSEL_OS          (1 << 17)
+#define K7_EVNTSEL_USR         (1 << 16)
+#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING   0x76
+#define K7_NMI_EVENT           K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
+
+#define P6_EVNTSEL0_ENABLE     (1 << 22)
+#define P6_EVNTSEL_INT         (1 << 20)
+#define P6_EVNTSEL_OS          (1 << 17)
+#define P6_EVNTSEL_USR         (1 << 16)
+#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
+#define P6_NMI_EVENT           P6_EVENT_CPU_CLOCKS_NOT_HALTED
+
+int __init check_nmi_watchdog (void)
+{
+       int counts[NR_CPUS];
+       int j, cpu;
+
+       printk(KERN_INFO "testing NMI watchdog ... ");
+
+       for (j = 0; j < NR_CPUS; ++j) 
+               counts[j] = cpu_pda[cpu_logical_map(j)].__nmi_count; 
+       sti();
+       mdelay((10*1000)/nmi_hz); // wait 10 ticks
+
+       for (j = 0; j < smp_num_cpus; j++) {
+               cpu = cpu_logical_map(j);
+               if (nmi_count(cpu) - counts[j] <= 5) {
+                       printk("CPU#%d: NMI appears to be stuck!\n", cpu);
+                       return -1;
+               }
+       }
+       printk("OK.\n");
+
+       /* now that we know it works we can reduce NMI frequency to
+          something more reasonable; makes a difference in some configs */
+       if (nmi_watchdog == NMI_LOCAL_APIC)
+               nmi_hz = 1;
+
+       return 0;
+}
+
+static int __init setup_nmi_watchdog(char *str)
+{
+       int nmi;
+
+       get_option(&str, &nmi);
+
+       if (nmi >= NMI_INVALID)
+               return 0;
+       if (nmi == NMI_NONE)
+               nmi_watchdog = nmi;
+       /*
+        * If any other x86 CPU has a local APIC, then
+        * please test the NMI stuff there and send me the
+        * missing bits. Right now Intel P6 and AMD K7 only.
+        */
+       if ((nmi == NMI_LOCAL_APIC) &&
+                       (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+                       (boot_cpu_data.x86 == 6))
+               nmi_watchdog = nmi;
+       if ((nmi == NMI_LOCAL_APIC) &&
+                       (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
+                       (boot_cpu_data.x86 == 6))
+               nmi_watchdog = nmi;
+       /*
+        * We can enable the IO-APIC watchdog
+        * unconditionally.
+        */
+       if (nmi == NMI_IO_APIC)
+               nmi_watchdog = nmi;
+       return 1;
+}
+
+__setup("nmi_watchdog=", setup_nmi_watchdog);
+
+#ifdef CONFIG_PM
+
+#include <linux/pm.h>
+
+struct pm_dev *nmi_pmdev;
+
+static void disable_apic_nmi_watchdog(void)
+{
+       switch (boot_cpu_data.x86_vendor) {
+       case X86_VENDOR_AMD:
+               wrmsr(MSR_K7_EVNTSEL0, 0, 0);
+               break;
+       case X86_VENDOR_INTEL:
+               wrmsr(MSR_IA32_EVNTSEL0, 0, 0);
+               break;
+       }
+}
+
+static int nmi_pm_callback(struct pm_dev *dev, pm_request_t rqst, void *data)
+{
+       switch (rqst) {
+       case PM_SUSPEND:
+               disable_apic_nmi_watchdog();
+               break;
+       case PM_RESUME:
+               setup_apic_nmi_watchdog();
+               break;
+       }
+       return 0;
+}
+
+static void nmi_pm_init(void)
+{
+       if (!nmi_pmdev)
+               nmi_pmdev = apic_pm_register(PM_SYS_DEV, 0, nmi_pm_callback);
+}
+
+#define __pminit       /*empty*/
+
+#else  /* CONFIG_PM */
+
+static inline void nmi_pm_init(void) { }
+
+#define __pminit       __init
+
+#endif /* CONFIG_PM */
+
+/*
+ * Activate the NMI watchdog via the local APIC.
+ * Original code written by Keith Owens.
+ */
+
+static void __pminit setup_k7_watchdog(void)
+{
+       int i;
+       unsigned int evntsel;
+
+       nmi_perfctr_msr = MSR_K7_PERFCTR0;
+
+       for(i = 0; i < 4; ++i) {
+               wrmsr(MSR_K7_EVNTSEL0+i, 0, 0);
+               wrmsr(MSR_K7_PERFCTR0+i, 0, 0);
+       }
+
+       evntsel = K7_EVNTSEL_INT
+               | K7_EVNTSEL_OS
+               | K7_EVNTSEL_USR
+               | K7_NMI_EVENT;
+
+       wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+       Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
+       wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+       evntsel |= K7_EVNTSEL_ENABLE;
+       wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
+}
+
+void __pminit setup_apic_nmi_watchdog (void)
+{
+       switch (boot_cpu_data.x86_vendor) {
+       case X86_VENDOR_AMD:
+               if (boot_cpu_data.x86 != 6)
+                       return;
+               setup_k7_watchdog();
+               break;
+       default:
+               return;
+       }
+       nmi_pm_init();
+}
+
+static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED;
+
+/*
+ * the best way to detect whether a CPU has a 'hard lockup' problem
+ * is to check it's local APIC timer IRQ counts. If they are not
+ * changing then that CPU has some problem.
+ *
+ * as these watchdog NMI IRQs are generated on every CPU, we only
+ * have to check the current processor.
+ *
+ * since NMIs dont listen to _any_ locks, we have to be extremely
+ * careful not to rely on unsafe variables. The printk might lock
+ * up though, so we have to break up any console locks first ...
+ * [when there will be more tty-related locks, break them up
+ *  here too!]
+ */
+
+static unsigned int
+       last_irq_sums [NR_CPUS],
+       alert_counter [NR_CPUS];
+
+void touch_nmi_watchdog (void)
+{
+       int i;
+
+       /*
+        * Just reset the alert counters, (other CPUs might be
+        * spinning on locks we hold):
+        */
+       for (i = 0; i < smp_num_cpus; i++)
+               alert_counter[i] = 0;
+}
+
+void nmi_watchdog_tick (struct pt_regs * regs)
+{
+
+       /*
+        * Since current_thread_info()-> is always on the stack, and we
+        * always switch the stack NMI-atomically, it's safe to use
+        * smp_processor_id().
+        */
+       int sum, cpu = smp_processor_id();
+
+       sum = apic_timer_irqs[cpu];
+
+       if (last_irq_sums[cpu] == sum) {
+               /*
+                * Ayiee, looks like this CPU is stuck ...
+                * wait a few IRQs (5 seconds) before doing the oops ...
+                */
+               alert_counter[cpu]++;
+               if (alert_counter[cpu] == 5*nmi_hz) {
+                       spin_lock(&nmi_print_lock);
+                       /*
+                        * We are in trouble anyway, lets at least try
+                        * to get a message out.
+                        */
+                       bust_spinlocks(1);
+                       printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu);
+                       show_registers(regs);
+                       printk("console shuts up ...\n");
+                       console_silent();
+                       spin_unlock(&nmi_print_lock);
+                       bust_spinlocks(0);
+                       do_exit(SIGSEGV);
+               }
+       } else {
+               last_irq_sums[cpu] = sum;
+               alert_counter[cpu] = 0;
+       }
+       if (nmi_perfctr_msr)
+               wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
+}
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
new file mode 100644 (file)
index 0000000..8f31c21
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Dynamic DMA mapping support.
+ */
+
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <asm/io.h>
+
+void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
+                          dma_addr_t *dma_handle)
+{
+       void *ret;
+       int gfp = GFP_ATOMIC;
+
+       /* We need to always allocate below 4Gig. We probably need new
+          GPF mask to say that */ 
+       gfp |= GFP_DMA;
+       ret = (void *)__get_free_pages(gfp, get_order(size));
+
+       if (ret != NULL) {
+               memset(ret, 0, size);
+               *dma_handle = virt_to_phys(ret);
+       }
+       return ret;
+}
+
+void pci_free_consistent(struct pci_dev *hwdev, size_t size,
+                        void *vaddr, dma_addr_t dma_handle)
+{
+       free_pages((unsigned long)vaddr, get_order(size));
+}
diff --git a/arch/x86_64/kernel/pci-irq.c b/arch/x86_64/kernel/pci-irq.c
new file mode 100644 (file)
index 0000000..2489c9e
--- /dev/null
@@ -0,0 +1,753 @@
+/*
+ *     Low-Level PCI Support for PC -- Routing of Interrupts
+ *
+ *     (c) 1999--2000 Martin Mares <mj@ucw.cz>
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/io_apic.h>
+
+#include "pci-x86_64.h"
+
+#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
+#define PIRQ_VERSION 0x0100
+
+static struct irq_routing_table *pirq_table;
+
+/*
+ * Never use: 0, 1, 2 (timer, keyboard, and cascade)
+ * Avoid using: 13, 14 and 15 (FP error and IDE).
+ * Penalize: 3, 4, 6, 7, 12 (known ISA uses: serial, floppy, parallel and mouse)
+ */
+unsigned int pcibios_irq_mask = 0xfff8;
+
+static int pirq_penalty[16] = {
+       1000000, 1000000, 1000000, 1000, 1000, 0, 1000, 1000,
+       0, 0, 0, 0, 1000, 100000, 100000, 100000
+};
+
+struct irq_router {
+       char *name;
+       u16 vendor, device;
+       int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
+       int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new);
+};
+
+/*
+ *  Search 0xf0000 -- 0xfffff for the PCI IRQ Routing Table.
+ */
+
+static struct irq_routing_table * __init pirq_find_routing_table(void)
+{
+       u8 *addr;
+       struct irq_routing_table *rt;
+       int i;
+       u8 sum;
+
+       for(addr = (u8 *) __va(0xf0000); addr < (u8 *) __va(0x100000); addr += 16) {
+               rt = (struct irq_routing_table *) addr;
+               if (rt->signature != PIRQ_SIGNATURE ||
+                   rt->version != PIRQ_VERSION ||
+                   rt->size % 16 ||
+                   rt->size < sizeof(struct irq_routing_table))
+                       continue;
+               sum = 0;
+               for(i=0; i<rt->size; i++)
+                       sum += addr[i];
+               if (!sum) {
+                       DBG("PCI: Interrupt Routing Table found at 0x%p\n", rt);
+                       return rt;
+               }
+       }
+       return NULL;
+}
+
+/*
+ *  If we have a IRQ routing table, use it to search for peer host
+ *  bridges.  It's a gross hack, but since there are no other known
+ *  ways how to get a list of buses, we have to go this way.
+ *
+ *  [maybe x86-64 architecture should define way to query this info in
+ more reasonable way?]
+ */
+
+static void __init pirq_peer_trick(void)
+{
+       struct irq_routing_table *rt = pirq_table;
+       u8 busmap[256];
+       int i;
+       struct irq_info *e;
+
+       memset(busmap, 0, sizeof(busmap));
+       for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) {
+               e = &rt->slots[i];
+#ifdef DEBUG
+               {
+                       int j;
+                       DBG("%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot);
+                       for(j=0; j<4; j++)
+                               DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap);
+                       DBG("\n");
+               }
+#endif
+               busmap[e->bus] = 1;
+       }
+       for(i=1; i<256; i++)
+               /*
+                *  It might be a secondary bus, but in this case its parent is already
+                *  known (ascending bus order) and therefore pci_scan_bus returns immediately.
+                */
+               if (busmap[i] && pci_scan_bus(i, pci_root_bus->ops, NULL))
+                       printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i);
+       pcibios_last_bus = -1;
+}
+
+/*
+ *  Code for querying and setting of IRQ routes on various interrupt routers.
+ */
+
+static void eisa_set_level_irq(unsigned int irq)
+{
+       unsigned char mask = 1 << (irq & 7);
+       unsigned int port = 0x4d0 + (irq >> 3);
+       unsigned char val = inb(port);
+
+       if (!(val & mask)) {
+               DBG(" -> edge");
+               outb(val | mask, port);
+       }
+}
+
+/*
+ * Common IRQ routing practice: nybbles in config space,
+ * offset by some magic constant.
+ */
+static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr)
+{
+       u8 x;
+       unsigned reg = offset + (nr >> 1);
+
+       pci_read_config_byte(router, reg, &x);
+       return (nr & 1) ? (x >> 4) : (x & 0xf);
+}
+
+static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val)
+{
+       u8 x;
+       unsigned reg = offset + (nr >> 1);
+
+       pci_read_config_byte(router, reg, &x);
+       x = (nr & 1) ? ((x & 0x0f) | (val << 4)) : ((x & 0xf0) | val);
+       pci_write_config_byte(router, reg, x);
+}
+
+/*
+ * ALI pirq entries are damn ugly, and completely undocumented.
+ * This has been figured out from pirq tables, and it's not a pretty
+ * picture.
+ */
+static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       static unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
+
+       return irqmap[read_config_nybble(router, 0x48, pirq-1)];
+}
+
+static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       static unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
+       unsigned int val = irqmap[irq];
+               
+       if (val) {
+               write_config_nybble(router, 0x48, pirq-1, val);
+               return 1;
+       }
+       return 0;
+}
+
+/*
+ * The Intel PIIX4 pirq rules are fairly simple: "pirq" is
+ * just a pointer to the config space.
+ */
+static int pirq_piix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       u8 x;
+
+       pci_read_config_byte(router, pirq, &x);
+       return (x < 16) ? x : 0;
+}
+
+static int pirq_piix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       pci_write_config_byte(router, pirq, irq);
+       return 1;
+}
+
+/*
+ * The VIA pirq rules are nibble-based, like ALI,
+ * but without the ugly irq number munging.
+ */
+static int pirq_via_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       return read_config_nybble(router, 0x55, pirq);
+}
+
+static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       write_config_nybble(router, 0x55, pirq, irq);
+       return 1;
+}
+
+/*
+ * OPTI: high four bits are nibble pointer..
+ * I wonder what the low bits do?
+ */
+static int pirq_opti_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       return read_config_nybble(router, 0xb8, pirq >> 4);
+}
+
+static int pirq_opti_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       write_config_nybble(router, 0xb8, pirq >> 4, irq);
+       return 1;
+}
+
+/*
+ * Cyrix: nibble offset 0x5C
+ */
+static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       return read_config_nybble(router, 0x5C, pirq-1);
+}
+
+static int pirq_cyrix_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       write_config_nybble(router, 0x5C, pirq-1, irq);
+       return 1;
+}
+
+/*
+ *     PIRQ routing for SiS 85C503 router used in several SiS chipsets
+ *     According to the SiS 5595 datasheet (preliminary V1.0, 12/24/1997)
+ *     the related registers work as follows:
+ *     
+ *     general: one byte per re-routable IRQ,
+ *              bit 7      IRQ mapping enabled (0) or disabled (1)
+ *              bits [6:4] reserved
+ *              bits [3:0] IRQ to map to
+ *                  allowed: 3-7, 9-12, 14-15
+ *                  reserved: 0, 1, 2, 8, 13
+ *
+ *     individual registers in device config space:
+ *
+ *     0x41/0x42/0x43/0x44:    PCI INT A/B/C/D - bits as in general case
+ *
+ *     0x61:                   IDEIRQ: bits as in general case - but:
+ *                             bits [6:5] must be written 01
+ *                             bit 4 channel-select primary (0), secondary (1)
+ *
+ *     0x62:                   USBIRQ: bits as in general case - but:
+ *                             bit 4 OHCI function disabled (0), enabled (1)
+ *     
+ *     0x6a:                   ACPI/SCI IRQ - bits as in general case
+ *
+ *     0x7e:                   Data Acq. Module IRQ - bits as in general case
+ *
+ *     Apparently there are systems implementing PCI routing table using both
+ *     link values 0x01-0x04 and 0x41-0x44 for PCI INTA..D, but register offsets
+ *     like 0x62 as link values for USBIRQ e.g. So there is no simple
+ *     "register = offset + pirq" relation.
+ *     Currently we support PCI INTA..D and USBIRQ and try our best to handle
+ *     both link mappings.
+ *     IDE/ACPI/DAQ mapping is currently unsupported (left untouched as set by BIOS).
+ */
+
+static int pirq_sis_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       u8 x;
+       int reg = pirq;
+
+       switch(pirq) {
+               case 0x01:
+               case 0x02:
+               case 0x03:
+               case 0x04:
+                       reg += 0x40;
+               case 0x41:
+               case 0x42:
+               case 0x43:
+               case 0x44:
+               case 0x62:
+       pci_read_config_byte(router, reg, &x);
+                       if (reg != 0x62)
+                               break;
+                       if (!(x & 0x40))
+                               return 0;
+                       break;
+               case 0x61:
+               case 0x6a:
+               case 0x7e:
+                       printk(KERN_INFO "SiS pirq: advanced IDE/ACPI/DAQ mapping not yet implemented\n");
+                       return 0;
+               default:                        
+                       printk(KERN_INFO "SiS router pirq escape (%d)\n", pirq);
+                       return 0;
+       }
+       return (x & 0x80) ? 0 : (x & 0x0f);
+}
+
+static int pirq_sis_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       u8 x;
+       int reg = pirq;
+
+       switch(pirq) {
+               case 0x01:
+               case 0x02:
+               case 0x03:
+               case 0x04:
+                       reg += 0x40;
+               case 0x41:
+               case 0x42:
+               case 0x43:
+               case 0x44:
+               case 0x62:
+                       x = (irq&0x0f) ? (irq&0x0f) : 0x80;
+                       if (reg != 0x62)
+                               break;
+                       /* always mark OHCI enabled, as nothing else knows about this */
+                       x |= 0x40;
+                       break;
+               case 0x61:
+               case 0x6a:
+               case 0x7e:
+                       printk(KERN_INFO "advanced SiS pirq mapping not yet implemented\n");
+                       return 0;
+               default:                        
+                       printk(KERN_INFO "SiS router pirq escape (%d)\n", pirq);
+                       return 0;
+       }
+       pci_write_config_byte(router, reg, x);
+
+       return 1;
+}
+
+/*
+ * VLSI: nibble offset 0x74 - educated guess due to routing table and
+ *       config space of VLSI 82C534 PCI-bridge/router (1004:0102)
+ *       Tested on HP OmniBook 800 covering PIRQ 1, 2, 4, 8 for onboard
+ *       devices, PIRQ 3 for non-pci(!) soundchip and (untested) PIRQ 6
+ *       for the busbridge to the docking station.
+ */
+
+static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       if (pirq > 8) {
+               printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
+               return 0;
+       }
+       return read_config_nybble(router, 0x74, pirq-1);
+}
+
+static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       if (pirq > 8) {
+               printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
+               return 0;
+       }
+       write_config_nybble(router, 0x74, pirq-1, irq);
+       return 1;
+}
+
+/*
+ * ServerWorks: PCI interrupts mapped to system IRQ lines through Index
+ * and Redirect I/O registers (0x0c00 and 0x0c01).  The Index register
+ * format is (PCIIRQ## | 0x10), e.g.: PCIIRQ10=0x1a.  The Redirect
+ * register is a straight binary coding of desired PIC IRQ (low nibble).
+ *
+ * The 'link' value in the PIRQ table is already in the correct format
+ * for the Index register.  There are some special index values:
+ * 0x00 for ACPI (SCI), 0x01 for USB, 0x02 for IDE0, 0x04 for IDE1,
+ * and 0x03 for SMBus.
+ */
+static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       outb_p(pirq, 0xc00);
+       return inb(0xc01) & 0xf;
+}
+
+static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       outb_p(pirq, 0xc00);
+       outb_p(irq, 0xc01);
+       return 1;
+}
+
+/* Support for AMD756 PCI IRQ Routing
+ * Jhon H. Caicedo <jhcaiced@osso.org.co>
+ * Jun/21/2001 0.2.0 Release, fixed to use "nybble" functions... (jhcaiced)
+ * Jun/19/2001 Alpha Release 0.1.0 (jhcaiced)
+ * The AMD756 pirq rules are nibble-based
+ * offset 0x56 0-3 PIRQA  4-7  PIRQB
+ * offset 0x57 0-3 PIRQC  4-7  PIRQD
+ */
+static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
+{
+       u8 irq;
+       irq = 0;
+       if (pirq <= 4)
+       {
+               irq = read_config_nybble(router, 0x56, pirq - 1);
+       }
+       printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n",
+               dev->vendor, dev->device, pirq, irq);
+       return irq;
+}
+
+static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", 
+               dev->vendor, dev->device, pirq, irq);
+       if (pirq <= 4)
+       {
+               write_config_nybble(router, 0x56, pirq - 1, irq);
+       }
+       return 1;
+}
+
+#ifdef CONFIG_PCI_BIOS
+
+static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
+{
+       struct pci_dev *bridge;
+       int pin = pci_get_interrupt_pin(dev, &bridge);
+       return pcibios_set_irq_routing(bridge, pin, irq);
+}
+
+static struct irq_router pirq_bios_router =
+       { "BIOS", 0, 0, NULL, pirq_bios_set };
+
+#endif
+
+static struct irq_router pirq_routers[] = {
+       { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371FB_0, pirq_piix_get, pirq_piix_set },
+       { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_0, pirq_piix_get, pirq_piix_set },
+       { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_0, pirq_piix_get, pirq_piix_set },
+       { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371MX,   pirq_piix_get, pirq_piix_set },
+       { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82443MX_0, pirq_piix_get, pirq_piix_set },
+       { "PIIX", PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801BA_0, pirq_piix_get, pirq_piix_set },
+
+       { "ALI", PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, pirq_ali_get, pirq_ali_set },
+
+       { "VIA", PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C586_0, pirq_via_get, pirq_via_set },
+       { "VIA", PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C596, pirq_via_get, pirq_via_set },
+       { "VIA", PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686, pirq_via_get, pirq_via_set },
+
+       { "OPTI", PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C700, pirq_opti_get, pirq_opti_set },
+
+       { "NatSemi", PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, pirq_cyrix_get, pirq_cyrix_set },
+       { "SIS", PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_503, pirq_sis_get, pirq_sis_set },
+       { "VLSI 82C534", PCI_VENDOR_ID_VLSI, PCI_DEVICE_ID_VLSI_82C534, pirq_vlsi_get, pirq_vlsi_set },
+       { "ServerWorks", PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_OSB4,
+         pirq_serverworks_get, pirq_serverworks_set },
+       { "AMD756 VIPER", PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_VIPER_740B,
+               pirq_amd756_get, pirq_amd756_set },
+
+       { "default", 0, 0, NULL, NULL }
+};
+
+static struct irq_router *pirq_router;
+static struct pci_dev *pirq_router_dev;
+
+static void __init pirq_find_router(void)
+{
+       struct irq_routing_table *rt = pirq_table;
+       struct irq_router *r;
+
+#ifdef CONFIG_PCI_BIOS
+       if (!rt->signature) {
+               printk(KERN_INFO "PCI: Using BIOS for IRQ routing\n");
+               pirq_router = &pirq_bios_router;
+               return;
+       }
+#endif
+
+       DBG("PCI: Attempting to find IRQ router for %04x:%04x\n",
+           rt->rtr_vendor, rt->rtr_device);
+
+       /* fall back to default router if nothing else found */
+       pirq_router = &pirq_routers[ARRAY_SIZE(pirq_routers) - 1];
+
+       pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn);
+       if (!pirq_router_dev) {
+               DBG("PCI: Interrupt router not found at %02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
+               return;
+       }
+
+       for(r=pirq_routers; r->vendor; r++) {
+               /* Exact match against router table entry? Use it! */
+               if (r->vendor == rt->rtr_vendor && r->device == rt->rtr_device) {
+                       pirq_router = r;
+                       break;
+               }
+               /* Match against router device entry? Use it as a fallback */
+               if (r->vendor == pirq_router_dev->vendor && r->device == pirq_router_dev->device) {
+       pirq_router = r;
+               }
+       }
+       printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n",
+               pirq_router->name,
+               pirq_router_dev->vendor,
+               pirq_router_dev->device,
+               pirq_router_dev->slot_name);
+}
+
+static struct irq_info *pirq_get_info(struct pci_dev *dev)
+{
+       struct irq_routing_table *rt = pirq_table;
+       int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info);
+       struct irq_info *info;
+
+       for (info = rt->slots; entries--; info++)
+               if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
+                       return info;
+       return NULL;
+}
+
+static void pcibios_test_irq_handler(int irq, void *dev_id, struct pt_regs *regs)
+{
+}
+
+static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
+{
+       u8 pin;
+       struct irq_info *info;
+       int i, pirq, newirq;
+       int irq = 0;
+       u32 mask;
+       struct irq_router *r = pirq_router;
+       struct pci_dev *dev2;
+       char *msg = NULL;
+
+       if (!pirq_table)
+               return 0;
+
+       /* Find IRQ routing entry */
+       pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+       if (!pin) {
+               DBG(" -> no interrupt pin\n");
+               return 0;
+       }
+       pin = pin - 1;
+       
+       DBG("IRQ for %s:%d", dev->slot_name, pin);
+       info = pirq_get_info(dev);
+       if (!info) {
+               DBG(" -> not found in routing table\n");
+               return 0;
+       }
+       pirq = info->irq[pin].link;
+       mask = info->irq[pin].bitmap;
+       if (!pirq) {
+               DBG(" -> not routed\n");
+               return 0;
+       }
+       DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs);
+       mask &= pcibios_irq_mask;
+
+       /*
+        * Find the best IRQ to assign: use the one
+        * reported by the device if possible.
+        */
+       newirq = dev->irq;
+       if (!newirq && assign) {
+               for (i = 0; i < 16; i++) {
+                       if (!(mask & (1 << i)))
+                               continue;
+                       if (pirq_penalty[i] < pirq_penalty[newirq] &&
+                           !request_irq(i, pcibios_test_irq_handler, SA_SHIRQ, "pci-test", dev)) {
+                               free_irq(i, dev);
+                               newirq = i;
+                       }
+               }
+       }
+       DBG(" -> newirq=%d", newirq);
+
+       /* Check if it is hardcoded */
+       if ((pirq & 0xf0) == 0xf0) {
+               irq = pirq & 0xf;
+               DBG(" -> hardcoded IRQ %d\n", irq);
+               msg = "Hardcoded";
+       } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq))) {
+               DBG(" -> got IRQ %d\n", irq);
+               msg = "Found";
+       } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
+               DBG(" -> assigning IRQ %d", newirq);
+               if (r->set(pirq_router_dev, dev, pirq, newirq)) {
+                       eisa_set_level_irq(newirq);
+                       DBG(" ... OK\n");
+                       msg = "Assigned";
+                       irq = newirq;
+               }
+       }
+
+       if (!irq) {
+               DBG(" ... failed\n");
+               if (newirq && mask == (1 << newirq)) {
+                       msg = "Guessed";
+                       irq = newirq;
+               } else
+                       return 0;
+       }
+       printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, dev->slot_name);
+
+       /* Update IRQ for all devices with the same pirq value */
+       pci_for_each_dev(dev2) {
+               pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);
+               if (!pin)
+                       continue;
+               pin--;
+               info = pirq_get_info(dev2);
+               if (!info)
+                       continue;
+               if (info->irq[pin].link == pirq) {
+                       /* We refuse to override the dev->irq information. Give a warning! */
+                       if (dev2->irq && dev2->irq != irq) {
+                               printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n",
+                                      dev2->slot_name, dev2->irq, irq);
+                               continue;
+                       }
+                       dev2->irq = irq;
+                       pirq_penalty[irq]++;
+                       if (dev != dev2)
+                               printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, dev2->slot_name);
+               }
+       }
+       return 1;
+}
+
+void __init pcibios_irq_init(void)
+{
+       DBG("PCI: IRQ init\n");
+       pirq_table = pirq_find_routing_table();
+#ifdef CONFIG_PCI_BIOS
+       if (!pirq_table && (pci_probe & PCI_BIOS_IRQ_SCAN))
+               pirq_table = pcibios_get_irq_routing_table();
+#endif
+       if (pirq_table) {
+               pirq_peer_trick();
+               pirq_find_router();
+               if (pirq_table->exclusive_irqs) {
+                       int i;
+                       for (i=0; i<16; i++)
+                               if (!(pirq_table->exclusive_irqs & (1 << i)))
+                                       pirq_penalty[i] += 100;
+               }
+               /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */
+               if (io_apic_assign_pci_irqs)
+                       pirq_table = NULL;
+       }
+}
+
+void __init pcibios_fixup_irqs(void)
+{
+       struct pci_dev *dev;
+       u8 pin;
+
+       DBG("PCI: IRQ fixup\n");
+       pci_for_each_dev(dev) {
+               /*
+                * If the BIOS has set an out of range IRQ number, just ignore it.
+                * Also keep track of which IRQ's are already in use.
+                */
+               if (dev->irq >= 16) {
+                       DBG("%s: ignoring bogus IRQ %d\n", dev->slot_name, dev->irq);
+                       dev->irq = 0;
+               }
+               /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */
+               if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000)
+                       pirq_penalty[dev->irq] = 0;
+               pirq_penalty[dev->irq]++;
+       }
+
+       pci_for_each_dev(dev) {
+               pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+#ifdef CONFIG_X86_IO_APIC
+               /*
+                * Recalculate IRQ numbers if we use the I/O APIC.
+                */
+               if (io_apic_assign_pci_irqs)
+               {
+                       int irq;
+
+                       if (pin) {
+                               pin--;          /* interrupt pins are numbered starting from 1 */
+                               irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
+       /*
+        * Busses behind bridges are typically not listed in the MP-table.
+        * In this case we have to look up the IRQ based on the parent bus,
+        * parent slot, and pin number. The SMP code detects such bridged
+        * busses itself so we should get into this branch reliably.
+        */
+                               if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
+                                       struct pci_dev * bridge = dev->bus->self;
+
+                                       pin = (pin + PCI_SLOT(dev->devfn)) % 4;
+                                       irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, 
+                                                       PCI_SLOT(bridge->devfn), pin);
+                                       if (irq >= 0)
+                                               printk(KERN_WARNING "PCI: using PPB(B%d,I%d,P%d) to get irq %d\n", 
+                                                       bridge->bus->number, PCI_SLOT(bridge->devfn), pin, irq);
+                               }
+                               if (irq >= 0) {
+                                       printk(KERN_INFO "PCI->APIC IRQ transform: (B%d,I%d,P%d) -> %d\n",
+                                               dev->bus->number, PCI_SLOT(dev->devfn), pin, irq);
+                                       dev->irq = irq;
+                               }
+                       }
+               }
+#endif
+               /*
+                * Still no IRQ? Try to lookup one...
+                */
+               if (pin && !dev->irq)
+                       pcibios_lookup_irq(dev, 0);
+       }
+}
+
+void pcibios_penalize_isa_irq(int irq)
+{
+       /*
+        *  If any ISAPnP device reports an IRQ in its list of possible
+        *  IRQ's, we try to avoid assigning it to PCI devices.
+        */
+       pirq_penalty[irq] += 100;
+}
+
+void pcibios_enable_irq(struct pci_dev *dev)
+{
+               u8 pin;
+               pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
+       if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
+                       char *msg;
+                       if (io_apic_assign_pci_irqs)
+                               msg = " Probably buggy MP table.";
+                       else if (pci_probe & PCI_BIOS_IRQ_SCAN)
+                               msg = "";
+                       else
+                               msg = " Please try using pci=biosirq.";
+                       printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
+                              'A' + pin - 1, dev->slot_name, msg);
+       }
+}
diff --git a/arch/x86_64/kernel/pci-pc.c b/arch/x86_64/kernel/pci-pc.c
new file mode 100644 (file)
index 0000000..36a9b46
--- /dev/null
@@ -0,0 +1,438 @@
+/*
+ *     Low-Level PCI Support for PC
+ *
+ *     (c) 1999--2000 Martin Mares <mj@ucw.cz>
+ *     2001 Andi Kleen. Cleanup for x86-64. Removed PCI-BIOS access and fixups
+ *     for hardware that is unlikely to exist on any Hammer platform.
+ * 
+ *     On x86-64 we don't have any access to the PCI-BIOS in long mode, so we
+ *     cannot sort the pci device table based on what the BIOS did. This might 
+ *     change the probing order of some devices compared to an i386 kernel.
+ *     May need to use ACPI to fix this.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+
+#include <asm/segment.h>
+#include <asm/io.h>
+
+#include "pci-x86_64.h"
+
+unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2;
+
+int pcibios_last_bus = -1;
+struct pci_bus *pci_root_bus;
+struct pci_ops *pci_root_ops;
+
+/*
+ * Direct access to PCI hardware...
+ */
+
+#ifdef CONFIG_PCI_DIRECT
+
+/*
+ * Functions for accessing PCI configuration space with type 1 accesses
+ */
+
+#define CONFIG_CMD(dev, where)   (0x80000000 | (dev->bus->number << 16) | (dev->devfn << 8) | (where & ~3))
+
+static int pci_conf1_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+       outl(CONFIG_CMD(dev,where), 0xCF8);
+       *value = inb(0xCFC + (where&3));
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_conf1_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+       outl(CONFIG_CMD(dev,where), 0xCF8);    
+       *value = inw(0xCFC + (where&2));
+       return PCIBIOS_SUCCESSFUL;    
+}
+
+static int pci_conf1_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+       outl(CONFIG_CMD(dev,where), 0xCF8);
+       *value = inl(0xCFC);
+       return PCIBIOS_SUCCESSFUL;    
+}
+
+static int pci_conf1_write_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+       outl(CONFIG_CMD(dev,where), 0xCF8);    
+       outb(value, 0xCFC + (where&3));
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_conf1_write_config_word(struct pci_dev *dev, int where, u16 value)
+{
+       outl(CONFIG_CMD(dev,where), 0xCF8);
+       outw(value, 0xCFC + (where&2));
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_conf1_write_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+       outl(CONFIG_CMD(dev,where), 0xCF8);
+       outl(value, 0xCFC);
+       return PCIBIOS_SUCCESSFUL;
+}
+
+#undef CONFIG_CMD
+
+static struct pci_ops pci_direct_conf1 = {
+       pci_conf1_read_config_byte,
+       pci_conf1_read_config_word,
+       pci_conf1_read_config_dword,
+       pci_conf1_write_config_byte,
+       pci_conf1_write_config_word,
+       pci_conf1_write_config_dword
+};
+
+/*
+ * Functions for accessing PCI configuration space with type 2 accesses
+ */
+
+#define IOADDR(devfn, where)   ((0xC000 | ((devfn & 0x78) << 5)) + where)
+#define FUNC(devfn)            (((devfn & 7) << 1) | 0xf0)
+#define SET(dev)               if (dev->devfn & 0x80) return PCIBIOS_DEVICE_NOT_FOUND;         \
+                               outb(FUNC(dev->devfn), 0xCF8);                                  \
+                               outb(dev->bus->number, 0xCFA);
+
+static int pci_conf2_read_config_byte(struct pci_dev *dev, int where, u8 *value)
+{
+       SET(dev);
+       *value = inb(IOADDR(dev->devfn,where));
+       outb (0, 0xCF8);
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_conf2_read_config_word(struct pci_dev *dev, int where, u16 *value)
+{
+       SET(dev);
+       *value = inw(IOADDR(dev->devfn,where));
+       outb (0, 0xCF8);
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_conf2_read_config_dword(struct pci_dev *dev, int where, u32 *value)
+{
+       SET(dev);
+       *value = inl (IOADDR(dev->devfn,where));    
+       outb (0, 0xCF8);    
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_conf2_write_config_byte(struct pci_dev *dev, int where, u8 value)
+{
+       SET(dev);
+       outb (value, IOADDR(dev->devfn,where));
+       outb (0, 0xCF8);    
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_conf2_write_config_word(struct pci_dev *dev, int where, u16 value)
+{
+       SET(dev);
+       outw (value, IOADDR(dev->devfn,where));
+       outb (0, 0xCF8);    
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_conf2_write_config_dword(struct pci_dev *dev, int where, u32 value)
+{
+       SET(dev);
+       outl (value, IOADDR(dev->devfn,where));    
+       outb (0, 0xCF8);    
+       return PCIBIOS_SUCCESSFUL;
+}
+
+#undef SET
+#undef IOADDR
+#undef FUNC
+
+static struct pci_ops pci_direct_conf2 = {
+       pci_conf2_read_config_byte,
+       pci_conf2_read_config_word,
+       pci_conf2_read_config_dword,
+       pci_conf2_write_config_byte,
+       pci_conf2_write_config_word,
+       pci_conf2_write_config_dword
+};
+
+/*
+ * Before we decide to use direct hardware access mechanisms, we try to do some
+ * trivial checks to ensure it at least _seems_ to be working -- we just test
+ * whether bus 00 contains a host bridge (this is similar to checking
+ * techniques used in XFree86, but ours should be more reliable since we
+ * attempt to make use of direct access hints provided by the PCI BIOS).
+ *
+ * This should be close to trivial, but it isn't, because there are buggy
+ * chipsets (yes, you guessed it, by Intel and Compaq) that have no class ID.
+ */
+static int __devinit pci_sanity_check(struct pci_ops *o)
+{
+       u16 x;
+       struct pci_bus bus;             /* Fake bus and device */
+       struct pci_dev dev;
+
+       if (pci_probe & PCI_NO_CHECKS)
+               return 1;
+       bus.number = 0;
+       dev.bus = &bus;
+       for(dev.devfn=0; dev.devfn < 0x100; dev.devfn++)
+               if ((!o->read_word(&dev, PCI_CLASS_DEVICE, &x) &&
+                    (x == PCI_CLASS_BRIDGE_HOST || x == PCI_CLASS_DISPLAY_VGA)) ||
+                   (!o->read_word(&dev, PCI_VENDOR_ID, &x) &&
+                    (x == PCI_VENDOR_ID_INTEL || x == PCI_VENDOR_ID_COMPAQ)))
+                       return 1;
+       DBG("PCI: Sanity check failed\n");
+       return 0;
+}
+
+static struct pci_ops * __devinit pci_check_direct(void)
+{
+       unsigned int tmp;
+       unsigned long flags;
+
+       __save_flags(flags); __cli();
+
+       /*
+        * Check if configuration type 1 works.
+        */
+       if (pci_probe & PCI_PROBE_CONF1) {
+               outb (0x01, 0xCFB);
+               tmp = inl (0xCF8);
+               outl (0x80000000, 0xCF8);
+               if (inl (0xCF8) == 0x80000000 &&
+                   pci_sanity_check(&pci_direct_conf1)) {
+                       outl (tmp, 0xCF8);
+                       __restore_flags(flags);
+                       printk("PCI: Using configuration type 1\n");
+                       request_region(0xCF8, 8, "PCI conf1");
+                       return &pci_direct_conf1;
+               }
+               outl (tmp, 0xCF8);
+       }
+
+       /*
+        * Check if configuration type 2 works.
+        */
+       if (pci_probe & PCI_PROBE_CONF2) {
+               outb (0x00, 0xCFB);
+               outb (0x00, 0xCF8);
+               outb (0x00, 0xCFA);
+               if (inb (0xCF8) == 0x00 && inb (0xCFA) == 0x00 &&
+                   pci_sanity_check(&pci_direct_conf2)) {
+                       __restore_flags(flags);
+                       printk("PCI: Using configuration type 2\n");
+                       request_region(0xCF8, 4, "PCI conf2");
+                       return &pci_direct_conf2;
+               }
+       }
+
+       __restore_flags(flags);
+       return NULL;
+}
+
+#endif
+
+
+/*
+ * Several buggy motherboards address only 16 devices and mirror
+ * them to next 16 IDs. We try to detect this `feature' on all
+ * primary buses (those containing host bridges as they are
+ * expected to be unique) and remove the ghost devices.
+ */
+
+static void __devinit pcibios_fixup_ghosts(struct pci_bus *b)
+{
+       struct list_head *ln, *mn;
+       struct pci_dev *d, *e;
+       int mirror = PCI_DEVFN(16,0);
+       int seen_host_bridge = 0;
+       int i;
+
+       DBG("PCI: Scanning for ghost devices on bus %d\n", b->number);
+       for (ln=b->devices.next; ln != &b->devices; ln=ln->next) {
+               d = pci_dev_b(ln);
+               if ((d->class >> 8) == PCI_CLASS_BRIDGE_HOST)
+                       seen_host_bridge++;
+               for (mn=ln->next; mn != &b->devices; mn=mn->next) {
+                       e = pci_dev_b(mn);
+                       if (e->devfn != d->devfn + mirror ||
+                           e->vendor != d->vendor ||
+                           e->device != d->device ||
+                           e->class != d->class)
+                               continue;
+                       for(i=0; i<PCI_NUM_RESOURCES; i++)
+                               if (e->resource[i].start != d->resource[i].start ||
+                                   e->resource[i].end != d->resource[i].end ||
+                                   e->resource[i].flags != d->resource[i].flags)
+                                       continue;
+                       break;
+               }
+               if (mn == &b->devices)
+                       return;
+       }
+       if (!seen_host_bridge)
+               return;
+       printk("PCI: Ignoring ghost devices on bus %02x\n", b->number);
+
+       ln = &b->devices;
+       while (ln->next != &b->devices) {
+               d = pci_dev_b(ln->next);
+               if (d->devfn >= mirror) {
+                       list_del(&d->global_list);
+                       list_del(&d->bus_list);
+                       kfree(d);
+               } else
+                       ln = ln->next;
+       }
+}
+
+/*
+ * Discover remaining PCI buses in case there are peer host bridges.
+ * We use the number of last PCI bus provided by the PCI BIOS.
+ */
+static void __devinit pcibios_fixup_peer_bridges(void)
+{
+       int n;
+       struct pci_bus bus;
+       struct pci_dev dev;
+       u16 l;
+
+       if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff)
+               return;
+       DBG("PCI: Peer bridge fixup\n");
+       for (n=0; n <= pcibios_last_bus; n++) {
+               if (pci_bus_exists(&pci_root_buses, n))
+                       continue;
+               bus.number = n;
+               bus.ops = pci_root_ops;
+               dev.bus = &bus;
+               for(dev.devfn=0; dev.devfn<256; dev.devfn += 8)
+                       if (!pci_read_config_word(&dev, PCI_VENDOR_ID, &l) &&
+                           l != 0x0000 && l != 0xffff) {
+                               DBG("Found device at %02x:%02x [%04x]\n", n, dev.devfn, l);
+                               printk("PCI: Discovered peer bus %02x\n", n);
+                               pci_scan_bus(n, pci_root_ops, NULL);
+                               break;
+                       }
+       }
+}
+
+struct pci_fixup pcibios_fixups[] = {
+       /* Currently no fixup for hammer systems. May need to readd them
+               as needed. */ 
+       { 0 }
+};
+
+/*
+ *  Called after each bus is probed, but before its children
+ *  are examined.
+ */
+
+void __devinit pcibios_fixup_bus(struct pci_bus *b)
+{
+       pcibios_fixup_ghosts(b);
+       pci_read_bridge_bases(b);
+}
+
+/*
+ * Initialization. Try all known PCI access methods. Note that we support
+ * using both PCI BIOS and direct access: in such cases, we use I/O ports
+ * to access config space, but we still keep BIOS order of cards to be
+ * compatible with 2.0.X. This should go away some day.
+ */
+
+void __devinit pcibios_init(void)
+{
+       struct pci_ops *bios = NULL;
+       struct pci_ops *dir = NULL;
+
+#ifdef CONFIG_PCI_DIRECT
+       if (pci_probe & (PCI_PROBE_CONF1 | PCI_PROBE_CONF2))
+               dir = pci_check_direct();
+#endif
+       if (dir)
+               pci_root_ops = dir;
+       else if (bios)
+               pci_root_ops = bios;
+       else {
+               printk("PCI: No PCI bus detected\n");
+               return;
+       }
+
+       printk("PCI: Probing PCI hardware\n");
+       pci_root_bus = pci_scan_bus(0, pci_root_ops, NULL);
+
+       pcibios_irq_init();
+       pcibios_fixup_peer_bridges();
+       pcibios_fixup_irqs();
+       pcibios_resource_survey();
+
+}
+
+char * __devinit pcibios_setup(char *str)
+{
+       if (!strcmp(str, "off")) {
+               pci_probe = 0;
+               return NULL;
+       }
+       else if (!strncmp(str, "bios", 4)) {
+               printk("PCI: No PCI bios access on x86-64. BIOS hint ignored.\n");
+               return NULL;
+       } else if (!strcmp(str, "nobios")) {
+               pci_probe &= ~PCI_PROBE_BIOS;
+               return NULL;
+       } else if (!strcmp(str, "nosort")) { /* Default */ 
+               pci_probe |= PCI_NO_SORT;
+               return NULL;
+       } 
+#ifdef CONFIG_PCI_DIRECT
+       else if (!strcmp(str, "conf1")) {
+               pci_probe = PCI_PROBE_CONF1 | PCI_NO_CHECKS;
+               return NULL;
+       }
+       else if (!strcmp(str, "conf2")) {
+               pci_probe = PCI_PROBE_CONF2 | PCI_NO_CHECKS;
+               return NULL;
+       }
+#endif
+       else if (!strcmp(str, "rom")) {
+               pci_probe |= PCI_ASSIGN_ROMS;
+               return NULL;
+       } else if (!strcmp(str, "assign-busses")) {
+               pci_probe |= PCI_ASSIGN_ALL_BUSSES;
+               return NULL;
+       } else if (!strncmp(str, "irqmask=", 8)) {
+               pcibios_irq_mask = simple_strtol(str+8, NULL, 0);
+               return NULL;
+       } else if (!strncmp(str, "lastbus=", 8)) {
+               pcibios_last_bus = simple_strtol(str+8, NULL, 0);
+               return NULL;
+       }
+       return str;
+}
+
+unsigned int pcibios_assign_all_busses(void)
+{
+       return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
+}
+
+int pcibios_enable_device(struct pci_dev *dev)
+{
+       int err;
+
+       if ((err = pcibios_enable_resources(dev)) < 0)
+               return err;
+       pcibios_enable_irq(dev);
+       return 0;
+}
diff --git a/arch/x86_64/kernel/pci-x86_64.c b/arch/x86_64/kernel/pci-x86_64.c
new file mode 100644 (file)
index 0000000..781e1c0
--- /dev/null
@@ -0,0 +1,384 @@
+/*
+ *     Low-Level PCI Access for x86-64 machines
+ *
+ * Copyright 1993, 1994 Drew Eckhardt
+ *      Visionary Computing
+ *      (Unix and Linux consulting and custom programming)
+ *      Drew@Colorado.EDU
+ *      +1 (303) 786-7975
+ *
+ * Drew's work was sponsored by:
+ *     iX Multiuser Multitasking Magazine
+ *     Hannover, Germany
+ *     hm@ix.de
+ *
+ * Copyright 1997--2000 Martin Mares <mj@ucw.cz>
+ *
+ * For more information, please consult the following manuals (look at
+ * http://www.pcisig.com/ for how to get them):
+ *
+ * PCI BIOS Specification
+ * PCI Local Bus Specification
+ * PCI to PCI Bridge Specification
+ * PCI System Design Guide
+ *
+ *
+ * CHANGELOG :
+ * Jun 17, 1994 : Modified to accommodate the broken pre-PCI BIOS SPECIFICATION
+ *     Revision 2.0 present on <thys@dennis.ee.up.ac.za>'s ASUS mainboard.
+ *
+ * Jan 5,  1995 : Modified to probe PCI hardware at boot time by Frederic
+ *     Potter, potter@cao-vlsi.ibp.fr
+ *
+ * Jan 10, 1995 : Modified to store the information about configured pci
+ *      devices into a list, which can be accessed via /proc/pci by
+ *      Curtis Varner, cvarner@cs.ucr.edu
+ *
+ * Jan 12, 1995 : CPU-PCI bridge optimization support by Frederic Potter.
+ *     Alpha version. Intel & UMC chipset support only.
+ *
+ * Apr 16, 1995 : Source merge with the DEC Alpha PCI support. Most of the code
+ *     moved to drivers/pci/pci.c.
+ *
+ * Dec 7, 1996  : Added support for direct configuration access of boards
+ *      with Intel compatible access schemes (tsbogend@alpha.franken.de)
+ *
+ * Feb 3, 1997  : Set internal functions to static, save/restore flags
+ *     avoid dead locks reading broken PCI BIOS, werner@suse.de 
+ *
+ * Apr 26, 1997 : Fixed case when there is BIOS32, but not PCI BIOS
+ *     (mj@atrey.karlin.mff.cuni.cz)
+ *
+ * May 7,  1997 : Added some missing cli()'s. [mj]
+ * 
+ * Jun 20, 1997 : Corrected problems in "conf1" type accesses.
+ *      (paubert@iram.es)
+ *
+ * Aug 2,  1997 : Split to PCI BIOS handling and direct PCI access parts
+ *     and cleaned it up...     Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ * Feb 6,  1998 : No longer using BIOS to find devices and device classes. [mj]
+ *
+ * May 1,  1998 : Support for peer host bridges. [mj]
+ *
+ * Jun 19, 1998 : Changed to use spinlocks, so that PCI configuration space
+ *     can be accessed from interrupts even on SMP systems. [mj]
+ *
+ * August  1998 : Better support for peer host bridges and more paranoid
+ *     checks for direct hardware access. Ugh, this file starts to look as
+ *     a large gallery of common hardware bug workarounds (watch the comments)
+ *     -- the PCI specs themselves are sane, but most implementors should be
+ *     hit hard with \hammer scaled \magstep5. [mj]
+ *
+ * Jan 23, 1999 : More improvements to peer host bridge logic. i450NX fixup. [mj]
+ *
+ * Feb 8,  1999 : Added UM8886BF I/O address fixup. [mj]
+ *
+ * August  1999 : New resource management and configuration access stuff. [mj]
+ *
+ * Sep 19, 1999 : Use PCI IRQ routing tables for detection of peer host bridges.
+ *               Based on ideas by Chris Frantz and David Hinds. [mj]
+ *
+ * Sep 28, 1999 : Handle unreported/unassigned IRQs. Thanks to Shuu Yamaguchi
+ *               for a lot of patience during testing. [mj]
+ *
+ * Oct  8, 1999 : Split to pci-i386.c, pci-pc.c and pci-visws.c. [mj]
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/errno.h>
+
+#include "pci-x86_64.h"
+
+void
+pcibios_update_resource(struct pci_dev *dev, struct resource *root,
+                       struct resource *res, int resource)
+{
+       u32 new, check;
+       int reg;
+
+       new = res->start | (res->flags & PCI_REGION_FLAG_MASK);
+       if (resource < 6) {
+               reg = PCI_BASE_ADDRESS_0 + 4*resource;
+       } else if (resource == PCI_ROM_RESOURCE) {
+               res->flags |= PCI_ROM_ADDRESS_ENABLE;
+               new |= PCI_ROM_ADDRESS_ENABLE;
+               reg = dev->rom_base_reg;
+       } else {
+               /* Somebody might have asked allocation of a non-standard resource */
+               return;
+       }
+       
+       pci_write_config_dword(dev, reg, new);
+       pci_read_config_dword(dev, reg, &check);
+       if ((new ^ check) & ((new & PCI_BASE_ADDRESS_SPACE_IO) ? PCI_BASE_ADDRESS_IO_MASK : PCI_BASE_ADDRESS_MEM_MASK)) {
+               printk(KERN_ERR "PCI: Error while updating region "
+                      "%s/%d (%08x != %08x)\n", dev->slot_name, resource,
+                      new, check);
+       }
+}
+
+/*
+ * We need to avoid collisions with `mirrored' VGA ports
+ * and other strange ISA hardware, so we always want the
+ * addresses to be allocated in the 0x000-0x0ff region
+ * modulo 0x400.
+ *
+ * Why? Because some silly external IO cards only decode
+ * the low 10 bits of the IO address. The 0x00-0xff region
+ * is reserved for motherboard devices that decode all 16
+ * bits, so it's ok to allocate at, say, 0x2800-0x28ff,
+ * but we want to try to avoid allocating at 0x2900-0x2bff
+ * which might have be mirrored at 0x0100-0x03ff..
+ */
+void
+pcibios_align_resource(void *data, struct resource *res, unsigned long size)
+{
+       if (res->flags & IORESOURCE_IO) {
+               unsigned long start = res->start;
+
+               if (start & 0x300) {
+                       start = (start + 0x3ff) & ~0x3ff;
+                       res->start = start;
+               }
+       }
+}
+
+
+/*
+ *  Handle resources of PCI devices.  If the world were perfect, we could
+ *  just allocate all the resource regions and do nothing more.  It isn't.
+ *  On the other hand, we cannot just re-allocate all devices, as it would
+ *  require us to know lots of host bridge internals.  So we attempt to
+ *  keep as much of the original configuration as possible, but tweak it
+ *  when it's found to be wrong.
+ *
+ *  Known BIOS problems we have to work around:
+ *     - I/O or memory regions not configured
+ *     - regions configured, but not enabled in the command register
+ *     - bogus I/O addresses above 64K used
+ *     - expansion ROMs left enabled (this may sound harmless, but given
+ *       the fact the PCI specs explicitly allow address decoders to be
+ *       shared between expansion ROMs and other resource regions, it's
+ *       at least dangerous)
+ *
+ *  Our solution:
+ *     (1) Allocate resources for all buses behind PCI-to-PCI bridges.
+ *         This gives us fixed barriers on where we can allocate.
+ *     (2) Allocate resources for all enabled devices.  If there is
+ *         a collision, just mark the resource as unallocated. Also
+ *         disable expansion ROMs during this step.
+ *     (3) Try to allocate resources for disabled devices.  If the
+ *         resources were assigned correctly, everything goes well,
+ *         if they weren't, they won't disturb allocation of other
+ *         resources.
+ *     (4) Assign new addresses to resources which were either
+ *         not configured at all or misconfigured.  If explicitly
+ *         requested by the user, configure expansion ROM address
+ *         as well.
+ */
+
+static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
+{
+       struct list_head *ln;
+       struct pci_bus *bus;
+       struct pci_dev *dev;
+       int idx;
+       struct resource *r, *pr;
+
+       /* Depth-First Search on bus tree */
+       for (ln=bus_list->next; ln != bus_list; ln=ln->next) {
+               bus = pci_bus_b(ln);
+               if ((dev = bus->self)) {
+                       for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) {
+                               r = &dev->resource[idx];
+                               if (!r->start)
+                                       continue;
+                               pr = pci_find_parent_resource(dev, r);
+                               if (!pr || request_resource(pr, r) < 0)
+                                       printk(KERN_ERR "PCI: Cannot allocate resource region %d of bridge %s\n", idx, dev->slot_name);
+                       }
+               }
+               pcibios_allocate_bus_resources(&bus->children);
+       }
+}
+
+static void __init pcibios_allocate_resources(int pass)
+{
+       struct pci_dev *dev;
+       int idx, disabled;
+       u16 command;
+       struct resource *r, *pr;
+
+       pci_for_each_dev(dev) {
+               pci_read_config_word(dev, PCI_COMMAND, &command);
+               for(idx = 0; idx < 6; idx++) {
+                       r = &dev->resource[idx];
+                       if (r->parent)          /* Already allocated */
+                               continue;
+                       if (!r->start)          /* Address not assigned at all */
+                               continue;
+                       if (r->flags & IORESOURCE_IO)
+                               disabled = !(command & PCI_COMMAND_IO);
+                       else
+                               disabled = !(command & PCI_COMMAND_MEMORY);
+                       if (pass == disabled) {
+                               DBG("PCI: Resource %08lx-%08lx (f=%lx, d=%d, p=%d)\n",
+                                   r->start, r->end, r->flags, disabled, pass);
+                               pr = pci_find_parent_resource(dev, r);
+                               if (!pr || request_resource(pr, r) < 0) {
+                                       printk(KERN_ERR "PCI: Cannot allocate resource region %d of device %s\n", idx, dev->slot_name);
+                                       /* We'll assign a new address later */
+                                       r->end -= r->start;
+                                       r->start = 0;
+                               }
+                       }
+               }
+               if (!pass) {
+                       r = &dev->resource[PCI_ROM_RESOURCE];
+                       if (r->flags & PCI_ROM_ADDRESS_ENABLE) {
+                               /* Turn the ROM off, leave the resource region, but keep it unregistered. */
+                               u32 reg;
+                               DBG("PCI: Switching off ROM of %s\n", dev->slot_name);
+                               r->flags &= ~PCI_ROM_ADDRESS_ENABLE;
+                               pci_read_config_dword(dev, dev->rom_base_reg, &reg);
+                               pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE);
+                       }
+               }
+       }
+}
+
+static void __init pcibios_assign_resources(void)
+{
+       struct pci_dev *dev;
+       int idx;
+       struct resource *r;
+
+       pci_for_each_dev(dev) {
+               int class = dev->class >> 8;
+
+               /* Don't touch classless devices and host bridges */
+               if (!class || class == PCI_CLASS_BRIDGE_HOST)
+                       continue;
+
+               for(idx=0; idx<6; idx++) {
+                       r = &dev->resource[idx];
+
+                       /*
+                        *  Don't touch IDE controllers and I/O ports of video cards!
+                        */
+                       if ((class == PCI_CLASS_STORAGE_IDE && idx < 4) ||
+                           (class == PCI_CLASS_DISPLAY_VGA && (r->flags & IORESOURCE_IO)))
+                               continue;
+
+                       /*
+                        *  We shall assign a new address to this resource, either because
+                        *  the BIOS forgot to do so or because we have decided the old
+                        *  address was unusable for some reason.
+                        */
+                       if (!r->start && r->end)
+                               pci_assign_resource(dev, idx);
+               }
+
+               if (pci_probe & PCI_ASSIGN_ROMS) {
+                       r = &dev->resource[PCI_ROM_RESOURCE];
+                       r->end -= r->start;
+                       r->start = 0;
+                       if (r->end)
+                               pci_assign_resource(dev, PCI_ROM_RESOURCE);
+               }
+       }
+}
+
+void __init pcibios_resource_survey(void)
+{
+       DBG("PCI: Allocating resources\n");
+       pcibios_allocate_bus_resources(&pci_root_buses);
+       pcibios_allocate_resources(0);
+       pcibios_allocate_resources(1);
+       pcibios_assign_resources();
+}
+
+int pcibios_enable_resources(struct pci_dev *dev)
+{
+       u16 cmd, old_cmd;
+       int idx;
+       struct resource *r;
+
+       pci_read_config_word(dev, PCI_COMMAND, &cmd);
+       old_cmd = cmd;
+       for(idx=0; idx<6; idx++) {
+               r = &dev->resource[idx];
+               if (!r->start && r->end) {
+                       printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", dev->slot_name);
+                       return -EINVAL;
+               }
+               if (r->flags & IORESOURCE_IO)
+                       cmd |= PCI_COMMAND_IO;
+               if (r->flags & IORESOURCE_MEM)
+                       cmd |= PCI_COMMAND_MEMORY;
+       }
+       if (dev->resource[PCI_ROM_RESOURCE].start)
+               cmd |= PCI_COMMAND_MEMORY;
+       if (cmd != old_cmd) {
+               printk("PCI: Enabling device %s (%04x -> %04x)\n", dev->slot_name, old_cmd, cmd);
+               pci_write_config_word(dev, PCI_COMMAND, cmd);
+       }
+       return 0;
+}
+
+/*
+ *  If we set up a device for bus mastering, we need to check the latency
+ *  timer as certain crappy BIOSes forget to set it properly.
+ */
+unsigned int pcibios_max_latency = 255;
+
+void pcibios_set_master(struct pci_dev *dev)
+{
+       u8 lat;
+       pci_read_config_byte(dev, PCI_LATENCY_TIMER, &lat);
+       if (lat < 16)
+               lat = (64 <= pcibios_max_latency) ? 64 : pcibios_max_latency;
+       else if (lat > pcibios_max_latency)
+               lat = pcibios_max_latency;
+       else
+               return;
+       printk("PCI: Setting latency timer of device %s to %d\n", dev->slot_name, lat);
+       pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
+}
+
+int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+                       enum pci_mmap_state mmap_state, int write_combine)
+{
+       unsigned long prot;
+
+       /* I/O space cannot be accessed via normal processor loads and
+        * stores on this platform.
+        */
+       if (mmap_state == pci_mmap_io)
+               return -EINVAL;
+
+       /* Leave vm_pgoff as-is, the PCI space address is the physical
+        * address on this platform.
+        */
+       vma->vm_flags |= (VM_SHM | VM_LOCKED | VM_IO);
+
+       prot = pgprot_val(vma->vm_page_prot);
+       if (boot_cpu_data.x86 > 3)
+               prot |= _PAGE_PCD | _PAGE_PWT;
+       vma->vm_page_prot = __pgprot(prot);
+
+       /* Write-combine setting is ignored, it is changed via the mtrr
+        * interfaces on this platform.
+        */
+       if (remap_page_range(vma, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT,
+                            vma->vm_end - vma->vm_start,
+                            vma->vm_page_prot))
+               return -EAGAIN;
+
+       return 0;
+}
diff --git a/arch/x86_64/kernel/pci-x86_64.h b/arch/x86_64/kernel/pci-x86_64.h
new file mode 100644 (file)
index 0000000..a78a051
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ *     Low-Level PCI Access for i386 machines.
+ *
+ *     (c) 1999 Martin Mares <mj@ucw.cz>
+ */
+
+#undef DEBUG
+
+#ifdef DEBUG
+#define DBG(x...) printk(x)
+#else
+#define DBG(x...)
+#endif
+
+#define PCI_PROBE_BIOS         0x0001
+#define PCI_PROBE_CONF1                0x0002
+#define PCI_PROBE_CONF2                0x0004
+#define PCI_NO_SORT            0x0100
+#define PCI_BIOS_SORT          0x0200
+#define PCI_NO_CHECKS          0x0400
+#define PCI_ASSIGN_ROMS                0x1000
+#define PCI_BIOS_IRQ_SCAN      0x2000
+#define PCI_ASSIGN_ALL_BUSSES  0x4000
+
+extern unsigned int pci_probe;
+
+/* pci-i386.c */
+
+extern unsigned int pcibios_max_latency;
+
+void pcibios_resource_survey(void);
+int pcibios_enable_resources(struct pci_dev *);
+
+/* pci-pc.c */
+
+extern int pcibios_last_bus;
+extern struct pci_bus *pci_root_bus;
+extern struct pci_ops *pci_root_ops;
+
+struct irq_routing_table *pcibios_get_irq_routing_table(void);
+int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
+
+/* pci-irq.c */
+
+struct irq_info {
+       u8 bus, devfn;                  /* Bus, device and function */
+       struct {
+               u8 link;                /* IRQ line ID, chipset dependent, 0=not routed */
+               u16 bitmap;             /* Available IRQs */
+       } __attribute__((packed)) irq[4];
+       u8 slot;                        /* Slot number, 0=onboard */
+       u8 rfu;
+} __attribute__((packed));
+
+struct irq_routing_table {
+       u32 signature;                  /* PIRQ_SIGNATURE should be here */
+       u16 version;                    /* PIRQ_VERSION */
+       u16 size;                       /* Table size in bytes */
+       u8 rtr_bus, rtr_devfn;          /* Where the interrupt router lies */
+       u16 exclusive_irqs;             /* IRQs devoted exclusively to PCI usage */
+       u16 rtr_vendor, rtr_device;     /* Vendor and device ID of interrupt router */
+       u32 miniport_data;              /* Crap */
+       u8 rfu[11];
+       u8 checksum;                    /* Modulo 256 checksum must give zero */
+       struct irq_info slots[0];
+} __attribute__((packed));
+
+extern unsigned int pcibios_irq_mask;
+
+void pcibios_irq_init(void);
+void pcibios_fixup_irqs(void);
+void pcibios_enable_irq(struct pci_dev *dev);
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
new file mode 100644 (file)
index 0000000..f552372
--- /dev/null
@@ -0,0 +1,756 @@
+/*
+ *  linux/arch/x86-64/kernel/process.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ * 
+ *  X86-64 port
+ *     Andi Kleen.
+ * 
+ *  $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
+ */
+
+/*
+ * This file handles the architecture-dependent parts of process handling..
+ */
+
+#define __KERNEL_SYSCALLS__
+#include <stdarg.h>
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/interrupt.h>
+#include <linux/config.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/init.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/ldt.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/desc.h>
+#include <asm/pda.h>
+#include <asm/prctl.h>
+
+#include <linux/irq.h>
+#include <linux/err.h>
+
+asmlinkage extern void ret_from_fork(void);
+
+int hlt_counter;
+
+/*
+ * Powermanagement idle function, if any..
+ */
+void (*pm_idle)(void);
+
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void);
+
+void disable_hlt(void)
+{
+       hlt_counter++;
+}
+
+void enable_hlt(void)
+{
+       hlt_counter--;
+}
+
+/*
+ * We use this if we don't have any better
+ * idle routine..
+ */
+static void default_idle(void)
+{
+       if (!hlt_counter) {
+               __cli();
+               if (!need_resched())
+                       safe_halt();
+               else
+                       __sti();
+       }
+}
+
+/*
+ * On SMP it's slightly faster (but much more power-consuming!)
+ * to poll the ->need_resched flag instead of waiting for the
+ * cross-CPU IPI to arrive. Use this option with caution.
+ */
+static void poll_idle (void)
+{
+       int oldval;
+
+       __sti();
+
+       /*
+        * Deal with another CPU just having chosen a thread to
+        * run here:
+        */
+       oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
+
+       if (!oldval) {
+               set_thread_flag(TIF_POLLING_NRFLAG); 
+               asm volatile(
+                       "2:"
+                       "testl %0,%1;"
+                       "rep; nop;"
+                       "je 2b;"
+                       : :
+                       "i" (_TIF_NEED_RESCHED), 
+                       "m" (current_thread_info()->flags));
+       } else {
+               set_need_resched();
+       }
+}
+
+/*
+ * The idle thread. There's no useful work to be
+ * done, so just try to conserve power and have a
+ * low exit latency (ie sit in a loop waiting for
+ * somebody to say that they'd like to reschedule)
+ */
+void cpu_idle (void)
+{
+       /* endless idle loop with no priority at all */
+       while (1) {
+               void (*idle)(void) = pm_idle;
+               if (!idle)
+                       idle = default_idle;
+               while (!need_resched())
+                       idle();
+               schedule();
+               check_pgt_cache();
+       }
+}
+
+static int __init idle_setup (char *str)
+{
+       if (!strncmp(str, "poll", 4)) {
+               printk("using polling idle threads.\n");
+               pm_idle = poll_idle;
+       }
+
+       return 1;
+}
+
+__setup("idle=", idle_setup);
+
+static long no_idt[3];
+static int reboot_mode;
+int reboot_thru_bios;
+
+#ifdef CONFIG_SMP
+int reboot_smp = 0;
+static int reboot_cpu = -1;
+/* shamelessly grabbed from lib/vsprintf.c for readability */
+#define is_digit(c)    ((c) >= '0' && (c) <= '9')
+#endif
+static int __init reboot_setup(char *str)
+{
+       while(1) {
+               switch (*str) {
+               case 'w': /* "warm" reboot (no memory testing etc) */
+                       reboot_mode = 0x1234;
+                       break;
+               case 'c': /* "cold" reboot (with memory testing etc) */
+                       reboot_mode = 0x0;
+                       break;
+               case 'b': /* "bios" reboot by jumping through the BIOS */
+                       reboot_thru_bios = 1;
+                       break;
+               case 'h': /* "hard" reboot by toggling RESET and/or crashing the CPU */
+                       reboot_thru_bios = 0;
+                       break;
+#ifdef CONFIG_SMP
+               case 's': /* "smp" reboot by executing reset on BSP or other CPU*/
+                       reboot_smp = 1;
+                       if (is_digit(*(str+1))) {
+                               reboot_cpu = (int) (*(str+1) - '0');
+                               if (is_digit(*(str+2))) 
+                                       reboot_cpu = reboot_cpu*10 + (int)(*(str+2) - '0');
+                       }
+                               /* we will leave sorting out the final value 
+                               when we are ready to reboot, since we might not
+                               have set up boot_cpu_id or smp_num_cpu */
+                       break;
+#endif
+               }
+               if((str = strchr(str,',')) != NULL)
+                       str++;
+               else
+                       break;
+       }
+       return 1;
+}
+
+__setup("reboot=", reboot_setup);
+
+static inline void kb_wait(void)
+{
+       int i;
+
+       for (i=0; i<0x10000; i++)
+               if ((inb_p(0x64) & 0x02) == 0)
+                       break;
+}
+
+/*
+ * Switch to real mode and then execute the code
+ * specified by the code and length parameters.
+ * We assume that length will aways be less that 100!
+ */
+void machine_real_restart(unsigned char *code, int length)
+{
+       cli();
+
+       /* This will have to be rewritten for sledgehammer. It would
+          help if sledgehammer have simple option to reset itself.
+       */
+
+       panic( "real_restart is hard to do.\n" );
+       while(1);
+}
+
+void machine_restart(char * __unused)
+{
+#if CONFIG_SMP
+       int cpuid;
+       
+       cpuid = GET_APIC_ID(apic_read(APIC_ID));
+
+       if (reboot_smp) {
+
+               /* check to see if reboot_cpu is valid 
+                  if its not, default to the BSP */
+               if ((reboot_cpu == -1) ||  
+                     (reboot_cpu > (NR_CPUS -1))  || 
+                     !(phys_cpu_present_map & (1<<cpuid))) 
+                       reboot_cpu = boot_cpu_id;
+
+               reboot_smp = 0;  /* use this as a flag to only go through this once*/
+               /* re-run this function on the other CPUs
+                  it will fall though this section since we have 
+                  cleared reboot_smp, and do the reboot if it is the
+                  correct CPU, otherwise it halts. */
+               if (reboot_cpu != cpuid)
+                       smp_call_function((void *)machine_restart , NULL, 1, 0);
+       }
+
+       /* if reboot_cpu is still -1, then we want a tradional reboot, 
+          and if we are not running on the reboot_cpu,, halt */
+       if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
+               for (;;)
+               __asm__ __volatile__ ("hlt");
+       }
+       /*
+        * Stop all CPUs and turn off local APICs and the IO-APIC, so
+        * other OSs see a clean IRQ state.
+        */
+       smp_send_stop();
+       disable_IO_APIC();
+#endif
+
+
+       if(!reboot_thru_bios) {
+               /* rebooting needs to touch the page at absolute addr 0 */
+               *((unsigned short *)__va(0x472)) = reboot_mode;
+               for (;;) {
+                       int i;
+                       for (i=0; i<100; i++) {
+                               kb_wait();
+                               udelay(50);
+                               outb(0xfe,0x64);         /* pulse reset low */
+                               udelay(50);
+                       }
+                       /* That didn't work - force a triple fault.. */
+                       __asm__ __volatile__("lidt %0": :"m" (no_idt));
+                       __asm__ __volatile__("int3");
+               }
+       }
+
+       printk("no bios restart currently\n"); 
+       for (;;); 
+}
+
+void machine_halt(void)
+{
+}
+
+void machine_power_off(void)
+{
+       if (pm_power_off)
+               pm_power_off();
+}
+
+/* Prints also some state that isn't saved in the pt_regs */ 
+void show_regs(struct pt_regs * regs)
+{
+       unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs;
+       unsigned int fsindex,gsindex;
+       unsigned int ds,cs,es; 
+
+       printk("\n");
+       printk("Pid: %d, comm: %.20s %s\n", current->pid, current->comm, print_tainted());
+       printk("RIP: %04lx:[<%016lx>]\n", regs->cs & 0xffff, regs->rip);
+       printk("RSP: %016lx  EFLAGS: %08lx\n", regs->rsp, regs->eflags);
+       printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
+              regs->rax, regs->rbx, regs->rcx);
+       printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
+              regs->rdx, regs->rsi, regs->rdi); 
+       printk("RBP: %016lx R08: %016lx R09: %08lx\n",
+              regs->rbp, regs->r8, regs->r9); 
+       printk("R10: %016lx R11: %016lx R12: %016lx\n",
+              regs->r10, regs->r11, regs->r12); 
+       printk("R13: %016lx R14: %016lx R15: %016lx\n",
+              regs->r13, regs->r14, regs->r15); 
+
+       asm("movl %%ds,%0" : "=r" (ds)); 
+       asm("movl %%es,%0" : "=r" (es)); 
+       asm("movl %%cs,%0" : "=r" (cs)); 
+       asm("movl %%fs,%0" : "=r" (fsindex));
+       asm("movl %%gs,%0" : "=r" (gsindex));
+
+       rdmsrl(0xc0000100, fs);
+       rdmsrl(0xc0000101, gs); 
+
+       asm("movq %%cr0, %0": "=r" (cr0));
+       asm("movq %%cr2, %0": "=r" (cr2));
+       asm("movq %%cr3, %0": "=r" (cr3));
+       asm("movq %%cr4, %0": "=r" (cr4));
+
+       printk("FS: %016lx(%04x) GS:%016lx(%04x)\n", fs,fsindex,gs,gsindex); 
+       printk("CS: %04x DS:%04x ES:%04x CR0: %016lx\n", cs, ds, es, cr0); 
+       printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
+}
+
+/*
+ * No need to lock the MM as we are the last user
+ */
+void release_segments(struct mm_struct *mm)
+{
+       void * ldt = mm->context.segments;
+
+       /*
+        * free the LDT
+        */
+       if (ldt) {
+               mm->context.segments = NULL;
+               clear_LDT();
+               vfree(ldt);
+       }
+}
+
+#define __STR(x) #x
+#define __STR2(x) __STR(x)
+
+/*
+ * Free current thread data structures etc..
+ */
+void exit_thread(void)
+{
+       /* nothing to do ... */
+}
+
+void flush_thread(void)
+{
+       struct task_struct *tsk = current;
+
+       memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
+       /*
+        * Forget coprocessor state..
+        */
+       clear_fpu(tsk);
+       tsk->used_math = 0;
+}
+
+void release_thread(struct task_struct *dead_task)
+{
+       if (dead_task->mm) {
+               void * ldt = dead_task->mm->context.segments;
+
+               // temporary debugging check
+               if (ldt) {
+                       printk("WARNING: dead process %8s still has LDT? <%p>\n",
+                                       dead_task->comm, ldt);
+                       BUG();
+               }
+       }
+}
+
+/*
+ * we do not have to muck with descriptors here, that is
+ * done in switch_mm() as needed.
+ */
+void copy_segments(struct task_struct *p, struct mm_struct *new_mm)
+{
+       struct mm_struct * old_mm;
+       void *old_ldt, *ldt;
+       ldt = NULL;
+       old_mm = current->mm;
+       if (old_mm && (old_ldt = old_mm->context.segments) != NULL) {
+               /*
+                * Completely new LDT, we initialize it from the parent:
+                */
+               ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
+               if (!ldt)
+                       printk(KERN_WARNING "ldt allocation failed\n");
+               else
+                       memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
+       }
+       new_mm->context.segments = ldt;
+       new_mm->context.cpuvalid = 0UL;
+       return;
+}
+
+int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, 
+               unsigned long unused,
+       struct task_struct * p, struct pt_regs * regs)
+{
+       struct pt_regs * childregs;
+       struct task_struct *me = current;
+
+       childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
+
+       *childregs = *regs;
+
+       childregs->rax = 0;
+       childregs->rsp = rsp;
+       if (rsp == ~0) {
+               childregs->rsp = (unsigned long)childregs;
+       }
+
+       p->thread.rsp = (unsigned long) childregs;
+       p->thread.rsp0 = (unsigned long) (childregs+1);
+       p->thread.userrsp = current->thread.userrsp; 
+
+       p->thread.rip = (unsigned long) ret_from_fork;
+
+       p->thread.fs = me->thread.fs;
+       p->thread.gs = me->thread.gs;
+
+       asm("movl %%gs,%0" : "=m" (p->thread.gsindex));
+       asm("movl %%fs,%0" : "=m" (p->thread.fsindex));
+       asm("movl %%es,%0" : "=m" (p->thread.es));
+       asm("movl %%ds,%0" : "=m" (p->thread.ds));
+
+       unlazy_fpu(current);    
+       p->thread.i387 = current->thread.i387;
+
+       return 0;
+}
+
+/*
+ * fill in the user structure for a core dump..
+ */
+void dump_thread(struct pt_regs * regs, struct user * dump)
+{
+       int i;
+
+/* changed the size calculations - should hopefully work better. lbt */
+       dump->magic = CMAGIC;
+       dump->start_code = 0;
+       dump->start_stack = regs->rsp & ~(PAGE_SIZE - 1);
+       dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
+       dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
+       dump->u_dsize -= dump->u_tsize;
+       dump->u_ssize = 0;
+       for (i = 0; i < 8; i++)
+               dump->u_debugreg[i] = current->thread.debugreg[i];  
+
+       if (dump->start_stack < TASK_SIZE)
+               dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
+
+#define SAVE(reg) dump->regs.reg = regs->reg
+       SAVE(rax);
+       SAVE(rbx);
+       SAVE(rcx);
+       SAVE(rdx);
+       SAVE(rsi);
+       SAVE(rdi);
+       SAVE(rbp);
+       SAVE(r8);
+       SAVE(r9);
+       SAVE(r10);
+       SAVE(r11);
+       SAVE(r12);
+       SAVE(r13);
+       SAVE(r14);
+       SAVE(r15);
+       SAVE(orig_rax); 
+       SAVE(rip); 
+#undef SAVE
+
+       /* FIXME: Should use symbolic names for msr-s! */
+       rdmsrl(0xc0000100, dump->regs.fs_base);
+       rdmsrl(0xc0000101, dump->regs.kernel_gs_base); 
+
+       dump->u_fpvalid = dump_fpu (regs, &dump->i387);
+}
+
+/*
+ * This special macro can be used to load a debugging register
+ */
+#define loaddebug(thread,register) \
+               set_debug(thread->debugreg[register], register)
+
+/*
+ *     switch_to(x,y) should switch tasks from x to y.
+ *
+ * We fsave/fwait so that an exception goes off at the right time
+ * (as a call from the fsave or fwait in effect) rather than to
+ * the wrong process. 
+ * 
+ * This could still be optimized: 
+ * - fold all the options into a flag word and test it with a single test.
+ * - could test fs/gs bitsliced
+ */
+void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+{
+       struct thread_struct *prev = &prev_p->thread,
+                                *next = &next_p->thread;
+       struct tss_struct *tss = init_tss + smp_processor_id();
+
+       unlazy_fpu(prev_p);
+
+       /*
+        * Reload esp0, LDT and the page table pointer:
+        */
+       tss->rsp0 = next->rsp0;
+
+       /* 
+        * Switch DS and ES.
+        * XXX: check if this is safe on SMP
+        */
+       asm volatile("movl %%es,%0" : "=m" (prev->es)); 
+       if (unlikely(next->es != prev->es))
+               loadsegment(es, next->es); 
+       
+       asm volatile ("movl %%ds,%0" : "=m" (prev->ds)); 
+       if (unlikely(next->ds != prev->ds))
+               loadsegment(ds, next->ds);
+
+       /* 
+        * Switch FS and GS.
+        */
+       { 
+               unsigned int fsindex;
+
+               asm volatile("movl %%fs,%0" : "=g" (fsindex)); 
+               if (unlikely(fsindex != prev->fsindex))
+                       prev->fs = 0;                           
+               if (unlikely((fsindex | next->fsindex) || prev->fs))
+                       loadsegment(fs, next->fsindex);
+               /* Should use a shortcut via a GDT entry if next->fs is 32bit */
+               if (fsindex != prev->fsindex || next->fs != prev->fs)
+                       wrmsrl(MSR_FS_BASE, next->fs); 
+               prev->fsindex = fsindex;
+       }
+
+       {
+               unsigned int gsindex;
+
+               asm volatile("movl %%gs,%0" : "=g" (gsindex)); 
+               if (unlikely(gsindex != prev->gsindex)) 
+                       prev->gs = 0;                           
+               if (unlikely((gsindex | next->gsindex) || prev->gs)) { 
+                       unsigned long flags; 
+                       /* could load gs in syscall exit after swapgs instead */ 
+                       int nr = smp_processor_id(); 
+                       __save_flags(flags); 
+                       __cli(); 
+                       loadsegment(gs, next->gsindex); 
+                       wrmsrl(MSR_GS_BASE, cpu_pda+nr); 
+                       __restore_flags(flags); 
+               }
+               if (gsindex != prev->gsindex || (prev->gs | next->gs))
+                       wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 
+               prev->gsindex = gsindex;
+       }
+
+       /* 
+        * Switch the PDA context.
+        */
+       prev->userrsp = read_pda(oldrsp); 
+       write_pda(oldrsp, next->userrsp); 
+       write_pda(pcurrent, next_p); 
+       write_pda(kernelstack, 
+                 (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
+
+       /*
+        * Now maybe reload the debug registers
+        */
+       if (unlikely(next->debugreg[7])) {
+               loaddebug(next, 0);
+               loaddebug(next, 1);
+               loaddebug(next, 2);
+               loaddebug(next, 3);
+               /* no 4 and 5 */
+               loaddebug(next, 6);
+               loaddebug(next, 7);
+       }
+
+
+       /* 
+        * Handle the IO bitmap 
+        */ 
+       if (unlikely(prev->ioperm | next->ioperm)) {
+               if (next->ioperm) {
+                       /*
+                        * 4 cachelines copy ... not good, but not that
+                        * bad either. Anyone got something better?
+                        * This only affects processes which use ioperm().
+                        * [Putting the TSSs into 4k-tlb mapped regions
+                        * and playing VM tricks to switch the IO bitmap
+                        * is not really acceptable.]
+                        * On x86-64 we could put multiple bitmaps into 
+                        * the GDT and just switch offsets
+                        * This would require ugly special cases on overflow
+                        * though -AK 
+                        */
+                       memcpy(tss->io_bitmap, next->io_bitmap,
+                                IO_BITMAP_SIZE*sizeof(u32));
+                       tss->io_map_base = IO_BITMAP_OFFSET;
+               } else {
+                       /*
+                        * a bitmap offset pointing outside of the TSS limit
+                        * causes a nicely controllable SIGSEGV if a process
+                        * tries to use a port IO instruction. The first
+                        * sys_ioperm() call sets up the bitmap properly.
+                        */
+                       tss->io_map_base = INVALID_IO_BITMAP_OFFSET;
+               }
+       }
+}
+
+/*
+ * sys_execve() executes a new program.
+ */
+asmlinkage 
+long sys_execve(char *name, char **argv,char **envp, struct pt_regs regs)
+{
+       long error;
+       char * filename;
+
+       filename = getname(name);
+       error = PTR_ERR(filename);
+       if (IS_ERR(filename)) 
+               return error;
+       error = do_execve(filename, argv, envp, &regs); 
+       if (error == 0)
+               current->ptrace &= ~PT_DTRACE;
+       putname(filename);
+       return error;
+}
+
+void set_personality_64bit(void)
+{
+       /* inherit personality from parent */
+
+       /* Make sure to be in 64bit mode */
+       clear_thread_flag(TIF_IA32); 
+}
+
+asmlinkage long sys_fork(struct pt_regs regs)
+{
+       return do_fork(SIGCHLD, regs.rsp, &regs, 0);
+}
+
+asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, struct pt_regs regs)
+{
+       if (!newsp)
+               newsp = regs.rsp;
+       return do_fork(clone_flags, newsp, &regs, 0);
+}
+
+/*
+ * This is trivial, and on the face of it looks like it
+ * could equally well be done in user mode.
+ *
+ * Not so, for quite unobvious reasons - register pressure.
+ * In user mode vfork() cannot have a stack frame, and if
+ * done by calling the "clone()" system call directly, you
+ * do not have enough call-clobbered registers to hold all
+ * the information you need.
+ */
+asmlinkage long sys_vfork(struct pt_regs regs)
+{
+       return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.rsp, &regs, 0);
+}
+
+/*
+ * These bracket the sleeping functions..
+ */
+extern void scheduling_functions_start_here(void);
+extern void scheduling_functions_end_here(void);
+#define first_sched    ((unsigned long) scheduling_functions_start_here)
+#define last_sched     ((unsigned long) scheduling_functions_end_here)
+
+/* 
+ * Do a traceback through the scheduler to find where a process sleeps.
+ * 
+ * Currently sched.c is compiled with -fno-omit-frame-pointer, so we
+ * just go through the stack frames. 
+ */
+unsigned long get_wchan(struct task_struct *p)
+{
+       return -1;
+}
+#undef last_sched
+#undef first_sched
+
+asmlinkage int sys_arch_prctl(int code, unsigned long addr)
+{ 
+       int ret = 0; 
+       unsigned long tmp; 
+       switch (code) { 
+       case ARCH_SET_GS:
+               asm volatile("movw %%gs,%0" : "=g" (current->thread.gsindex)); 
+               current->thread.gs = addr;
+               ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); 
+               break;
+       case ARCH_SET_FS:
+               asm volatile("movw %%fs,%0" : "=g" (current->thread.fsindex)); 
+               current->thread.fs = addr;
+               ret = checking_wrmsrl(MSR_FS_BASE, addr); 
+               break;
+
+               /* Returned value may not be correct when the user changed fs/gs */ 
+       case ARCH_GET_FS:
+               rdmsrl(MSR_FS_BASE, tmp);
+               ret = put_user(tmp, (unsigned long *)addr); 
+               break; 
+
+       case ARCH_GET_GS: 
+               rdmsrl(MSR_KERNEL_GS_BASE, tmp); 
+               ret = put_user(tmp, (unsigned long *)addr); 
+               break;
+
+       default:
+               ret = -EINVAL;
+               break;
+       } 
+       return ret;     
+} 
+
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c
new file mode 100644 (file)
index 0000000..4ef88c5
--- /dev/null
@@ -0,0 +1,435 @@
+/* ptrace.c */
+/* By Ross Biro 1/23/92 */
+/*
+ * Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/debugreg.h>
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+/* determines which flags the user has access to. */
+/* 1 = access 0 = no access */
+#define FLAG_MASK 0x44dd5UL
+
+/* set's the trap flag. */
+#define TRAP_FLAG 0x100UL
+
+/*
+ * eflags and offset of eflags on child stack..
+ */
+#define EFLAGS offsetof(struct pt_regs, eflags)
+#define EFL_OFFSET ((int)(EFLAGS-sizeof(struct pt_regs)))
+
+/*
+ * this routine will get a word off of the processes privileged stack. 
+ * the offset is how far from the base addr as stored in the TSS.  
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */   
+static inline unsigned long get_stack_long(struct task_struct *task, int offset)
+{
+       unsigned char *stack;
+
+       stack = (unsigned char *)task->thread.rsp0;
+       stack += offset;
+       return (*((unsigned long *)stack));
+}
+
+/*
+ * this routine will put a word on the processes privileged stack. 
+ * the offset is how far from the base addr as stored in the TSS.  
+ * this routine assumes that all the privileged stacks are in our
+ * data space.
+ */
+static inline long put_stack_long(struct task_struct *task, int offset,
+       unsigned long data)
+{
+       unsigned char * stack;
+
+       stack = (unsigned char *) task->thread.rsp0;
+       stack += offset;
+       *(unsigned long *) stack = data;
+       return 0;
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure the single step bit is not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{ 
+       long tmp;
+
+       tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG;
+       put_stack_long(child, EFL_OFFSET, tmp);
+}
+
+static int putreg(struct task_struct *child,
+       unsigned long regno, unsigned long value)
+{
+       unsigned long tmp; 
+       switch (regno >> 2) {
+               // XXX: add 64bit setting. 
+               case FS:
+                       if (value && (value & 3) != 3)
+                               return -EIO;
+                       child->thread.fs = value;
+                       return 0;
+               case GS:
+                       if (value && (value & 3) != 3)
+                               return -EIO;
+                       child->thread.gs = value;
+                       return 0;
+               case EFLAGS:
+                       value &= FLAG_MASK;
+                       tmp = get_stack_long(child, EFL_OFFSET); 
+                       tmp &= ~FLAG_MASK; 
+                       value |= tmp;
+                       break;
+       }
+       /* assumption about sizes... */
+       if (regno > GS*4)
+               regno -= 2*4;
+       /* This has to be changes to put_stack_64() */
+       /* Hmm, with 32 bit applications being around... this will be
+          rather funny */
+       put_stack_long(child, regno - sizeof(struct pt_regs), value);
+       return 0;
+}
+
+static unsigned long getreg(struct task_struct *child,
+       unsigned long regno)
+{
+       switch (regno >> 3) {
+               case FS:
+                       return child->thread.fs;
+               case GS:
+                       return child->thread.gs;
+               default:
+                       regno = regno - sizeof(struct pt_regs);
+                       return get_stack_long(child, regno);
+       }
+
+}
+
+asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
+{
+       struct task_struct *child;
+       struct user * dummy = NULL;
+       long i, ret;
+
+       /* This lock_kernel fixes a subtle race with suid exec */
+       lock_kernel();
+       ret = -EPERM;
+       if (request == PTRACE_TRACEME) {
+               /* are we already being traced? */
+               if (current->ptrace & PT_PTRACED)
+                       goto out;
+               /* set the ptrace bit in the process flags. */
+               current->ptrace |= PT_PTRACED;
+               ret = 0;
+               goto out;
+       }
+       ret = -ESRCH;
+       read_lock(&tasklist_lock);
+       child = find_task_by_pid(pid);
+       if (child)
+               get_task_struct(child);
+       read_unlock(&tasklist_lock);
+       if (!child)
+               goto out;
+
+       ret = -EPERM;
+       if (pid == 1)           /* you may not mess with init */
+               goto out_tsk;
+
+       if (request == PTRACE_ATTACH) {
+               ret = ptrace_attach(child);
+               goto out_tsk;
+       }
+       ret = -ESRCH;
+       if (!(child->ptrace & PT_PTRACED))
+               goto out_tsk;
+       if (child->state != TASK_STOPPED) {
+               if (request != PTRACE_KILL)
+                       goto out_tsk;
+       }
+       if (child->p_pptr != current)
+               goto out_tsk;
+       switch (request) {
+       /* when I and D space are separate, these will need to be fixed. */
+       case PTRACE_PEEKTEXT: /* read word at location addr. */ 
+       case PTRACE_PEEKDATA: {
+               unsigned long tmp;
+               int copied;
+
+               copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
+               ret = -EIO;
+               if (copied != sizeof(tmp))
+                       break;
+               ret = put_user(tmp,(unsigned long *) data);
+               break;
+       }
+
+       /* read the word at location addr in the USER area. */
+       case PTRACE_PEEKUSR: {
+               unsigned long tmp;
+
+               ret = -EIO;
+               if ((addr & 3) || addr < 0 || 
+                   addr > sizeof(struct user) - 3)
+                       break;
+
+               tmp = 0;  /* Default return condition */
+               if(addr < 20*sizeof(long))
+                       tmp = getreg(child, addr);
+               if(addr >= (long) &dummy->u_debugreg[0] &&
+                  addr <= (long) &dummy->u_debugreg[7]){
+                       addr -= (long) &dummy->u_debugreg[0];
+                       addr = addr >> 2;
+                       tmp = child->thread.debugreg[addr];
+               }
+               ret = put_user(tmp,(unsigned long *) data);
+               break;
+       }
+
+       /* when I and D space are separate, this will have to be fixed. */
+       case PTRACE_POKETEXT: /* write the word at location addr. */
+       case PTRACE_POKEDATA:
+               ret = 0;
+               if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data))
+                       break;
+               ret = -EIO;
+               break;
+
+       case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
+               ret = -EIO;
+               if ((addr & 3) || addr < 0 || 
+                   addr > sizeof(struct user) - 3)
+                       break;
+
+               if (addr < 20*sizeof(long)) {
+                       ret = putreg(child, addr, data);
+                       break;
+               }
+               /* We need to be very careful here.  We implicitly
+                  want to modify a portion of the task_struct, and we
+                  have to be selective about what portions we allow someone
+                  to modify. */
+
+                 ret = -EIO;
+                 if(addr >= (long) &dummy->u_debugreg[0] &&
+                    addr <= (long) &dummy->u_debugreg[7]){
+
+                         if(addr == (long) &dummy->u_debugreg[4]) break;
+                         if(addr == (long) &dummy->u_debugreg[5]) break;
+                         if(addr < (long) &dummy->u_debugreg[4] &&
+                            ((unsigned long) data) >= TASK_SIZE-3) break;
+                         
+                         if(addr == (long) &dummy->u_debugreg[7]) {
+                                 data &= ~DR_CONTROL_RESERVED;
+                                 for(i=0; i<4; i++)
+                                         if ((0x5454 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
+                                                 goto out_tsk;
+                         }
+
+                         addr -= (long) &dummy->u_debugreg;
+                         addr = addr >> 2;
+                         child->thread.debugreg[addr] = data;
+                         ret = 0;
+                 }
+                 break;
+
+       case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
+       case PTRACE_CONT: { /* restart after signal. */
+               long tmp;
+
+               ret = -EIO;
+               if ((unsigned long) data > _NSIG)
+                       break;
+               if (request == PTRACE_SYSCALL) {
+                       set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+               }
+               else {
+                       clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+               }
+               child->exit_code = data;
+       /* make sure the single step bit is not set. */
+               tmp = get_stack_long(child, EFL_OFFSET);
+               tmp &= ~TRAP_FLAG;
+               put_stack_long(child, EFL_OFFSET,tmp);
+               wake_up_process(child);
+               ret = 0;
+               break;
+       }
+
+/*
+ * make the child exit.  Best I can do is send it a sigkill. 
+ * perhaps it should be put in the status that it wants to 
+ * exit.
+ */
+       case PTRACE_KILL: {
+               long tmp;
+
+               ret = 0;
+               if (child->state == TASK_ZOMBIE)        /* already dead */
+                       break;
+               child->exit_code = SIGKILL;
+               /* make sure the single step bit is not set. */
+               tmp = get_stack_long(child, EFL_OFFSET) & ~TRAP_FLAG;
+               put_stack_long(child, EFL_OFFSET, tmp);
+               wake_up_process(child);
+               break;
+       }
+
+       case PTRACE_SINGLESTEP: {  /* set the trap flag. */
+               long tmp;
+
+               ret = -EIO;
+               if ((unsigned long) data > _NSIG)
+                       break;
+               clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+               if ((child->ptrace & PT_DTRACE) == 0) {
+                       /* Spurious delayed TF traps may occur */
+                       child->ptrace |= PT_DTRACE;
+               }
+               tmp = get_stack_long(child, EFL_OFFSET) | TRAP_FLAG;
+               put_stack_long(child, EFL_OFFSET, tmp);
+               child->exit_code = data;
+               /* give it a chance to run. */
+               wake_up_process(child);
+               ret = 0;
+               break;
+       }
+
+       case PTRACE_DETACH:
+               /* detach a process that was attached. */
+               ret = ptrace_detach(child, data);
+               break;
+
+       case PTRACE_GETREGS: { /* Get all gp regs from the child. */
+               if (!access_ok(VERIFY_WRITE, (unsigned *)data, FRAME_SIZE)) {
+                       ret = -EIO;
+                       break;
+               }
+               for ( i = 0; i < FRAME_SIZE; i += sizeof(long) ) {
+                       __put_user(getreg(child, i),(unsigned long *) data);
+                       data += sizeof(long);
+               }
+               ret = 0;
+               break;
+       }
+
+       case PTRACE_SETREGS: { /* Set all gp regs in the child. */
+               unsigned long tmp;
+               if (!access_ok(VERIFY_READ, (unsigned *)data, FRAME_SIZE)) {
+                       ret = -EIO;
+                       break;
+               }
+               for ( i = 0; i < FRAME_SIZE; i += sizeof(long) ) {
+                       __get_user(tmp, (unsigned long *) data);
+                       putreg(child, i, tmp);
+                       data += sizeof(long);
+               }
+               ret = 0;
+               break;
+       }
+
+       case PTRACE_GETFPREGS: { /* Get the child extended FPU state. */
+               if (!access_ok(VERIFY_WRITE, (unsigned *)data,
+                              sizeof(struct user_i387_struct))) {
+                       ret = -EIO;
+                       break;
+               }
+               if ( !child->used_math ) {
+                       /* Simulate an empty FPU. */
+                       set_fpu_cwd(child, 0x037f);
+                       set_fpu_swd(child, 0x0000);
+                       set_fpu_twd(child, 0xffff);
+                       set_fpu_mxcsr(child, 0x1f80);
+               }
+               ret = get_fpregs((struct user_i387_struct *)data, child);
+               break;
+       }
+
+       case PTRACE_SETFPREGS: { /* Set the child extended FPU state. */
+               if (!access_ok(VERIFY_READ, (unsigned *)data,
+                              sizeof(struct user_i387_struct))) {
+                       ret = -EIO;
+                       break;
+               }
+               child->used_math = 1;
+               ret = set_fpregs(child, (struct user_i387_struct *)data);
+               break;
+       }
+
+       case PTRACE_SETOPTIONS: {
+               if (data & PTRACE_O_TRACESYSGOOD)
+                       child->ptrace |= PT_TRACESYSGOOD;
+               else
+                       child->ptrace &= ~PT_TRACESYSGOOD;
+               ret = 0;
+               break;
+       }
+
+       default:
+               ret = -EIO;
+               break;
+       }
+out_tsk:
+       put_task_struct(child);
+out:
+       unlock_kernel();
+       return ret;
+}
+
+asmlinkage void syscall_trace(struct pt_regs *regs)
+{
+
+#if 0
+       printk("trace %s rip %lx rsp %lx rax %d origrax %d caller %lx tiflags %x ptrace %x\n",
+              current->comm,
+              regs->rip, regs->rsp, regs->rax, regs->orig_rax, __builtin_return_address(0),
+              current_thread_info()->flags, current->ptrace); 
+#endif
+
+       if (!test_thread_flag(TIF_SYSCALL_TRACE))
+               return; 
+       if (!(current->ptrace & PT_PTRACED))
+               return;
+       
+       current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+                                       ? 0x80 : 0);
+       current->state = TASK_STOPPED;
+       notify_parent(current, SIGCHLD);
+       schedule();
+       /*
+        * this isn't the same as continuing with a signal, but it will do
+        * for normal use.  strace only continues with a signal if the
+        * stopping signal is not SIGTRAP.  -brl
+        */
+       if (current->exit_code) {
+               send_sig(current->exit_code, current, 1);
+               current->exit_code = 0;
+       }
+}
diff --git a/arch/x86_64/kernel/semaphore.c b/arch/x86_64/kernel/semaphore.c
new file mode 100644 (file)
index 0000000..e324b44
--- /dev/null
@@ -0,0 +1,225 @@
+/*
+ * x86_64 semaphore implementation.
+ *
+ * (C) Copyright 1999 Linus Torvalds
+ *
+ * Portions Copyright 1999 Red Hat, Inc.
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ *
+ * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@redhat.com>
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/err.h>
+
+#include <asm/semaphore.h>
+
+/*
+ * Semaphores are implemented using a two-way counter:
+ * The "count" variable is decremented for each process
+ * that tries to acquire the semaphore, while the "sleeping"
+ * variable is a count of such acquires.
+ *
+ * Notably, the inline "up()" and "down()" functions can
+ * efficiently test if they need to do any extra work (up
+ * needs to do something only if count was negative before
+ * the increment operation.
+ *
+ * "sleeping" and the contention routine ordering is
+ * protected by the semaphore spinlock.
+ *
+ * Note that these functions are only called when there is
+ * contention on the lock, and as such all this is the
+ * "non-critical" part of the whole semaphore business. The
+ * critical part is the inline stuff in <asm/semaphore.h>
+ * where we want to avoid any extra jumps and calls.
+ */
+
+/*
+ * Logic:
+ *  - only on a boundary condition do we need to care. When we go
+ *    from a negative count to a non-negative, we wake people up.
+ *  - when we go from a non-negative count to a negative do we
+ *    (a) synchronize with the "sleeper" count and (b) make sure
+ *    that we're on the wakeup list before we synchronize so that
+ *    we cannot lose wakeup events.
+ */
+
+void __up(struct semaphore *sem)
+{
+       wake_up(&sem->wait);
+}
+
+static spinlock_t semaphore_lock = SPIN_LOCK_UNLOCKED;
+
+void __down(struct semaphore * sem)
+{
+       struct task_struct *tsk = current;
+       DECLARE_WAITQUEUE(wait, tsk);
+       tsk->state = TASK_UNINTERRUPTIBLE;
+       add_wait_queue_exclusive(&sem->wait, &wait);
+
+       spin_lock_irq(&semaphore_lock);
+       sem->sleepers++;
+       for (;;) {
+               int sleepers = sem->sleepers;
+
+               /*
+                * Add "everybody else" into it. They aren't
+                * playing, because we own the spinlock.
+                */
+               if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+                       sem->sleepers = 0;
+                       break;
+               }
+               sem->sleepers = 1;      /* us - see -1 above */
+               spin_unlock_irq(&semaphore_lock);
+
+               schedule();
+               tsk->state = TASK_UNINTERRUPTIBLE;
+               spin_lock_irq(&semaphore_lock);
+       }
+       spin_unlock_irq(&semaphore_lock);
+       remove_wait_queue(&sem->wait, &wait);
+       tsk->state = TASK_RUNNING;
+       wake_up(&sem->wait);
+}
+
+int __down_interruptible(struct semaphore * sem)
+{
+       int retval = 0;
+       struct task_struct *tsk = current;
+       DECLARE_WAITQUEUE(wait, tsk);
+       tsk->state = TASK_INTERRUPTIBLE;
+       add_wait_queue_exclusive(&sem->wait, &wait);
+
+       spin_lock_irq(&semaphore_lock);
+       sem->sleepers ++;
+       for (;;) {
+               int sleepers = sem->sleepers;
+
+               /*
+                * With signals pending, this turns into
+                * the trylock failure case - we won't be
+                * sleeping, and we* can't get the lock as
+                * it has contention. Just correct the count
+                * and exit.
+                */
+               if (signal_pending(current)) {
+                       retval = -EINTR;
+                       sem->sleepers = 0;
+                       atomic_add(sleepers, &sem->count);
+                       break;
+               }
+
+               /*
+                * Add "everybody else" into it. They aren't
+                * playing, because we own the spinlock. The
+                * "-1" is because we're still hoping to get
+                * the lock.
+                */
+               if (!atomic_add_negative(sleepers - 1, &sem->count)) {
+                       sem->sleepers = 0;
+                       break;
+               }
+               sem->sleepers = 1;      /* us - see -1 above */
+               spin_unlock_irq(&semaphore_lock);
+
+               schedule();
+               tsk->state = TASK_INTERRUPTIBLE;
+               spin_lock_irq(&semaphore_lock);
+       }
+       spin_unlock_irq(&semaphore_lock);
+       tsk->state = TASK_RUNNING;
+       remove_wait_queue(&sem->wait, &wait);
+       wake_up(&sem->wait);
+       return retval;
+}
+
+/*
+ * Trylock failed - make sure we correct for
+ * having decremented the count.
+ *
+ * We could have done the trylock with a
+ * single "cmpxchg" without failure cases,
+ * but then it wouldn't work on a 386.
+ */
+int __down_trylock(struct semaphore * sem)
+{
+       int sleepers;
+       unsigned long flags;
+
+       spin_lock_irqsave(&semaphore_lock, flags);
+       sleepers = sem->sleepers + 1;
+       sem->sleepers = 0;
+
+       /*
+        * Add "everybody else" and us into it. They aren't
+        * playing, because we own the spinlock.
+        */
+       if (!atomic_add_negative(sleepers, &sem->count))
+               wake_up(&sem->wait);
+
+       spin_unlock_irqrestore(&semaphore_lock, flags);
+       return 1;
+}
+
+
+/*
+ * The semaphore operations have a special calling sequence that
+ * allow us to do a simpler in-line version of them. These routines
+ * need to convert that sequence back into the C sequence when
+ * there is contention on the semaphore.
+ *
+ * %rcx contains the semaphore pointer on entry. Save all the callee
+ * clobbered registers.  It would be better if the compiler had a way 
+ * to specify that for the callee.
+ */
+
+
+#define PUSH_CLOBBER "pushq %rdi ; pushq %rsi ; pushq %rdx ; pushq %rcx ;" \
+ "pushq %rbx ; pushq %r8 ; push %r9\n\t" 
+#define POP_CLOBBER "popq %r9 ; popq %r8 ; popq %rbx ; popq %rcx ; " \
+       "popq %rdx ; popq %rsi ; popq %rdi\n\t"     
+
+#define SEM_ENTRY(label, name) asm( \
+       ".p2align\n\t.globl " #label "\n\t" \
+       #label ":\n\t" PUSH_CLOBBER "call " #name "\n\t" POP_CLOBBER "ret" )
+
+SEM_ENTRY(__down_failed, __down);
+SEM_ENTRY(__down_failed_interruptible, __down_interruptible); 
+SEM_ENTRY(__down_failed_trylock, __down_trylock); 
+SEM_ENTRY(__up_wakeup, __up);
+
+
+#if defined(CONFIG_SMP)
+asm(
+".p2align"
+"\n.globl      __write_lock_failed"
+"\n__write_lock_failed:"
+"\n    " LOCK "addl    $" RW_LOCK_BIAS_STR ",(%rax)"
+"\n1:  rep; nop; cmpl  $" RW_LOCK_BIAS_STR ",(%rax)"
+"\n    jne     1b"
+
+"\n    " LOCK "subl    $" RW_LOCK_BIAS_STR ",(%rax)"
+"\n    jnz     __write_lock_failed"
+"\n    ret"
+
+
+"\n.p2align"
+"\n.globl      __read_lock_failed"
+"\n__read_lock_failed:"
+"\n    lock ; incl     (%rax)"
+"\n1:  rep; nop; cmpl  $1,(%rax)"
+"\n    js      1b"
+
+"\n    lock ; decl     (%rax)"
+"\n    js      __read_lock_failed"
+"\n    ret"
+);
+#endif
+
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
new file mode 100644 (file)
index 0000000..fedfaa5
--- /dev/null
@@ -0,0 +1,1128 @@
+/*
+ *  linux/arch/x86-64/kernel/setup.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *
+ *  Nov 2001 Dave Jones <davej@suse.de>
+ *  Forked from i386 setup code.
+ *
+ *  $Id$
+ */
+
+/*
+ * This file handles the architecture-dependent parts of initialization
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/slab.h>
+#include <linux/user.h>
+#include <linux/a.out.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/apm_bios.h>
+#ifdef CONFIG_BLK_DEV_RAM
+#include <linux/blk.h>
+#endif
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <asm/processor.h>
+#include <linux/console.h>
+#include <linux/seq_file.h>
+#include <asm/mtrr.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/msr.h>
+#include <asm/desc.h>
+#include <asm/e820.h>
+#include <asm/dma.h>
+#include <asm/mpspec.h>
+#include <asm/mmu_context.h>
+#include <asm/bootsetup.h>
+
+/*
+ * Machine setup..
+ */
+
+extern void mcheck_init(struct cpuinfo_x86 *c);
+
+char ignore_irq13;             /* set if exception 16 works */
+struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
+
+unsigned long mmu_cr4_features;
+
+/* For PCI or other memory-mapped resources */
+unsigned long pci_mem_start = 0x10000000;
+
+/*
+ * Setup options
+ */
+struct drive_info_struct { char dummy[32]; } drive_info;
+struct screen_info screen_info;
+struct apm_info apm_info;
+struct sys_desc_table_struct {
+       unsigned short length;
+       unsigned char table[0];
+};
+
+struct e820map e820;
+
+unsigned char aux_device_present;
+
+extern int root_mountflags;
+extern char _text, _etext, _edata, _end;
+extern unsigned long cpu_khz;
+
+static int disable_x86_fxsr __initdata = 0;
+
+static char command_line[COMMAND_LINE_SIZE];
+       char saved_command_line[COMMAND_LINE_SIZE];
+
+struct resource standard_io_resources[] = {
+       { "dma1", 0x00, 0x1f, IORESOURCE_BUSY },
+       { "pic1", 0x20, 0x3f, IORESOURCE_BUSY },
+       { "timer", 0x40, 0x5f, IORESOURCE_BUSY },
+       { "keyboard", 0x60, 0x6f, IORESOURCE_BUSY },
+       { "dma page reg", 0x80, 0x8f, IORESOURCE_BUSY },
+       { "pic2", 0xa0, 0xbf, IORESOURCE_BUSY },
+       { "dma2", 0xc0, 0xdf, IORESOURCE_BUSY },
+       { "fpu", 0xf0, 0xff, IORESOURCE_BUSY }
+};
+
+#define STANDARD_IO_RESOURCES (sizeof(standard_io_resources)/sizeof(struct resource))
+
+static struct resource code_resource = { "Kernel code", 0x100000, 0 };
+static struct resource data_resource = { "Kernel data", 0, 0 };
+static struct resource vram_resource = { "Video RAM area", 0xa0000, 0xbffff, IORESOURCE_BUSY };
+
+/* System ROM resources */
+#define MAXROMS 6
+static struct resource rom_resources[MAXROMS] = {
+       { "System ROM", 0xF0000, 0xFFFFF, IORESOURCE_BUSY },
+       { "Video ROM", 0xc0000, 0xc7fff, IORESOURCE_BUSY }
+};
+
+#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
+
+static void __init probe_roms(void)
+{
+       int roms = 1;
+       unsigned long base;
+       unsigned char *romstart;
+
+       request_resource(&iomem_resource, rom_resources+0);
+
+       /* Video ROM is standard at C000:0000 - C7FF:0000, check signature */
+       for (base = 0xC0000; base < 0xE0000; base += 2048) {
+               romstart = isa_bus_to_virt(base);
+               if (!romsignature(romstart))
+                       continue;
+               request_resource(&iomem_resource, rom_resources + roms);
+               roms++;
+               break;
+       }
+
+       /* Extension roms at C800:0000 - DFFF:0000 */
+       for (base = 0xC8000; base < 0xE0000; base += 2048) {
+               unsigned long length;
+
+               romstart = isa_bus_to_virt(base);
+               if (!romsignature(romstart))
+                       continue;
+               length = romstart[2] * 512;
+               if (length) {
+                       unsigned int i;
+                       unsigned char chksum;
+
+                       chksum = 0;
+                       for (i = 0; i < length; i++)
+                               chksum += romstart[i];
+
+                       /* Good checksum? */
+                       if (!chksum) {
+                               rom_resources[roms].start = base;
+                               rom_resources[roms].end = base + length - 1;
+                               rom_resources[roms].name = "Extension ROM";
+                               rom_resources[roms].flags = IORESOURCE_BUSY;
+
+                               request_resource(&iomem_resource, rom_resources + roms);
+                               roms++;
+                               if (roms >= MAXROMS)
+                                       return;
+                       }
+               }
+       }
+
+       /* Final check for motherboard extension rom at E000:0000 */
+       base = 0xE0000;
+       romstart = isa_bus_to_virt(base);
+
+       if (romsignature(romstart)) {
+               rom_resources[roms].start = base;
+               rom_resources[roms].end = base + 65535;
+               rom_resources[roms].name = "Extension ROM";
+               rom_resources[roms].flags = IORESOURCE_BUSY;
+
+               request_resource(&iomem_resource, rom_resources + roms);
+       }
+}
+
+void __init add_memory_region(unsigned long long start,
+                                  unsigned long long size, int type)
+{
+       int x = e820.nr_map;
+
+       if (x == E820MAX) {
+           printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+           return;
+       }
+
+       e820.map[x].addr = start;
+       e820.map[x].size = size;
+       e820.map[x].type = type;
+       e820.nr_map++;
+} /* add_memory_region */
+
+#define E820_DEBUG     1
+
+static void __init print_memory_map(char *who)
+{
+       int i;
+
+       for (i = 0; i < e820.nr_map; i++) {
+               printk(" %s: %016Lx - %016Lx ", who,
+                       (unsigned long long)e820.map[i].addr,
+                       (unsigned long long)(e820.map[i].addr + e820.map[i].size));
+               switch (e820.map[i].type) {
+               case E820_RAM:  printk("(usable)\n");
+                               break;
+               case E820_RESERVED:
+                               printk("(reserved)\n");
+                               break;
+               case E820_ACPI:
+                               printk("(ACPI data)\n");
+                               break;
+               case E820_NVS:
+                               printk("(ACPI NVS)\n");
+                               break;
+               default:        printk("type %lu\n", (unsigned long)e820.map[i].type);
+                               break;
+               }
+       }
+}
+
+/*
+ * Sanitize the BIOS e820 map.
+ *
+ * Some e820 responses include overlapping entries.  The following 
+ * replaces the original e820 map with a new one, removing overlaps.
+ *
+ */
+static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
+{
+       struct change_member {
+               struct e820entry *pbios; /* pointer to original bios entry */
+               unsigned long long addr; /* address for this change point */
+       };
+       struct change_member change_point_list[2*E820MAX];
+       struct change_member *change_point[2*E820MAX];
+       struct e820entry *overlap_list[E820MAX];
+       struct e820entry new_bios[E820MAX];
+       struct change_member *change_tmp;
+       unsigned long current_type, last_type;
+       unsigned long long last_addr;
+       int chgidx, still_changing;
+       int overlap_entries;
+       int new_bios_entry;
+       int old_nr, new_nr;
+       int i;
+
+       /*
+               Visually we're performing the following (1,2,3,4 = memory types)...
+
+               Sample memory map (w/overlaps):
+                  ____22__________________
+                  ______________________4_
+                  ____1111________________
+                  _44_____________________
+                  11111111________________
+                  ____________________33__
+                  ___________44___________
+                  __________33333_________
+                  ______________22________
+                  ___________________2222_
+                  _________111111111______
+                  _____________________11_
+                  _________________4______
+
+               Sanitized equivalent (no overlap):
+                  1_______________________
+                  _44_____________________
+                  ___1____________________
+                  ____22__________________
+                  ______11________________
+                  _________1______________
+                  __________3_____________
+                  ___________44___________
+                  _____________33_________
+                  _______________2________
+                  ________________1_______
+                  _________________4______
+                  ___________________2____
+                  ____________________33__
+                  ______________________4_
+       */
+
+       /* if there's only one memory region, don't bother */
+       if (*pnr_map < 2)
+               return -1;
+
+       old_nr = *pnr_map;
+
+       /* bail out if we find any unreasonable addresses in bios map */
+       for (i=0; i<old_nr; i++)
+               if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
+                       return -1;
+
+       /* create pointers for initial change-point information (for sorting) */
+       for (i=0; i < 2*old_nr; i++)
+               change_point[i] = &change_point_list[i];
+
+       /* record all known change-points (starting and ending addresses) */
+       chgidx = 0;
+       for (i=0; i < old_nr; i++)      {
+               change_point[chgidx]->addr = biosmap[i].addr;
+               change_point[chgidx++]->pbios = &biosmap[i];
+               change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
+               change_point[chgidx++]->pbios = &biosmap[i];
+       }
+
+       /* sort change-point list by memory addresses (low -> high) */
+       still_changing = 1;
+       while (still_changing)  {
+               still_changing = 0;
+               for (i=1; i < 2*old_nr; i++)  {
+                       /* if <current_addr> > <last_addr>, swap */
+                       /* or, if current=<start_addr> & last=<end_addr>, swap */
+                       if ((change_point[i]->addr < change_point[i-1]->addr) ||
+                               ((change_point[i]->addr == change_point[i-1]->addr) &&
+                                (change_point[i]->addr == change_point[i]->pbios->addr) &&
+                                (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
+                          )
+                       {
+                               change_tmp = change_point[i];
+                               change_point[i] = change_point[i-1];
+                               change_point[i-1] = change_tmp;
+                               still_changing=1;
+                       }
+               }
+       }
+
+       /* create a new bios memory map, removing overlaps */
+       overlap_entries=0;       /* number of entries in the overlap table */
+       new_bios_entry=0;        /* index for creating new bios map entries */
+       last_type = 0;           /* start with undefined memory type */
+       last_addr = 0;           /* start with 0 as last starting address */
+       /* loop through change-points, determining affect on the new bios map */
+       for (chgidx=0; chgidx < 2*old_nr; chgidx++)
+       {
+               /* keep track of all overlapping bios entries */
+               if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
+               {
+                       /* add map entry to overlap list (> 1 entry implies an overlap) */
+                       overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
+               }
+               else
+               {
+                       /* remove entry from list (order independent, so swap with last) */
+                       for (i=0; i<overlap_entries; i++)
+                       {
+                               if (overlap_list[i] == change_point[chgidx]->pbios)
+                                       overlap_list[i] = overlap_list[overlap_entries-1];
+                       }
+                       overlap_entries--;
+               }
+               /* if there are overlapping entries, decide which "type" to use */
+               /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
+               current_type = 0;
+               for (i=0; i<overlap_entries; i++)
+                       if (overlap_list[i]->type > current_type)
+                               current_type = overlap_list[i]->type;
+               /* continue building up new bios map based on this information */
+               if (current_type != last_type)  {
+                       if (last_type != 0)      {
+                               new_bios[new_bios_entry].size =
+                                       change_point[chgidx]->addr - last_addr;
+                               /* move forward only if the new size was non-zero */
+                               if (new_bios[new_bios_entry].size != 0)
+                                       if (++new_bios_entry >= E820MAX)
+                                               break;  /* no more space left for new bios entries */
+                       }
+                       if (current_type != 0)  {
+                               new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
+                               new_bios[new_bios_entry].type = current_type;
+                               last_addr=change_point[chgidx]->addr;
+                       }
+                       last_type = current_type;
+               }
+       }
+       new_nr = new_bios_entry;   /* retain count for new bios entries */
+
+       /* copy new bios mapping into original location */
+       memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
+       *pnr_map = new_nr;
+
+       return 0;
+}
+
+/*
+ * Copy the BIOS e820 map into a safe place.
+ *
+ * Sanity-check it while we're at it..
+ *
+ * If we're lucky and live on a modern system, the setup code
+ * will have given us a memory map that we can use to properly
+ * set up memory.  If we aren't, we'll fake a memory map.
+ *
+ * We check to see that the memory map contains at least 2 elements
+ * before we'll use it, because the detection code in setup.S may
+ * not be perfect and most every PC known to man has two memory
+ * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
+ * thinkpad 560x, for example, does not cooperate with the memory
+ * detection code.)
+ */
+static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
+{
+       /* Only one memory region (or negative)? Ignore it */
+       if (nr_map < 2)
+               return -1;
+
+       do {
+               unsigned long long start = biosmap->addr;
+               unsigned long long size = biosmap->size;
+               unsigned long long end = start + size;
+               unsigned long type = biosmap->type;
+
+               /* Overflow in 64 bits? Ignore the memory map. */
+               if (start > end)
+                       return -1;
+
+               /*
+                * Some BIOSes claim RAM in the 640k - 1M region.
+                * Not right. Fix it up.
+                */
+               if (type == E820_RAM) {
+                       if (start < 0x100000ULL && end > 0xA0000ULL) {
+                               if (start < 0xA0000ULL)
+                                       add_memory_region(start, 0xA0000ULL-start, type);
+                               if (end <= 0x100000ULL)
+                                       continue;
+                               start = 0x100000ULL;
+                               size = end - start;
+                       }
+               }
+               add_memory_region(start, size, type);
+       } while (biosmap++,--nr_map);
+       return 0;
+}
+
+/*
+ * Do NOT EVER look at the BIOS memory size location.
+ * It does not work on many machines.
+ */
+#define LOWMEMSIZE()   (0x9f000)
+
+void __init setup_memory_region(void)
+{
+       char *who = "BIOS-e820";
+
+       /*
+        * Try to copy the BIOS-supplied E820-map.
+        *
+        * Otherwise fake a memory map; one section from 0k->640k,
+        * the next section from 1mb->appropriate_mem_k
+        */
+       sanitize_e820_map(E820_MAP, &E820_MAP_NR);
+       if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
+               unsigned long mem_size;
+
+               /* compare results from other methods and take the greater */
+               if (ALT_MEM_K < EXT_MEM_K) {
+                       mem_size = EXT_MEM_K;
+                       who = "BIOS-88";
+               } else {
+                       mem_size = ALT_MEM_K;
+                       who = "BIOS-e801";
+               }
+
+               e820.nr_map = 0;
+               add_memory_region(0, LOWMEMSIZE(), E820_RAM);
+               add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
+       }
+       printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+       print_memory_map(who);
+} /* setup_memory_region */
+
+
+static inline void parse_mem_cmdline (char ** cmdline_p)
+{
+       char c = ' ', *to = command_line, *from = COMMAND_LINE;
+       int len = 0;
+       int usermem = 0;
+
+       /* Save unparsed command line copy for /proc/cmdline */
+       memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
+       saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
+
+       for (;;) {
+               /*
+                * "mem=nopentium" disables the 4MB page tables.
+                * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
+                * to <mem>, overriding the bios size.
+                * "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from
+                * <start> to <start>+<mem>, overriding the bios size.
+                */
+               if (c == ' ' && !memcmp(from, "mem=", 4)) {
+                       if (to != command_line)
+                               to--;
+                       if (!memcmp(from+4, "nopentium", 9)) {
+                               from += 9+4;
+                               clear_bit(X86_FEATURE_PSE, &boot_cpu_data.x86_capability);
+                       } else if (!memcmp(from+4, "exactmap", 8)) {
+                               from += 8+4;
+                               e820.nr_map = 0;
+                               usermem = 1;
+                       } else {
+                               /* If the user specifies memory size, we
+                                * blow away any automatically generated
+                                * size
+                                */
+                               unsigned long long start_at, mem_size;
+                               if (usermem == 0) {
+                                       /* first time in: zap the whitelist
+                                        * and reinitialize it with the
+                                        * standard low-memory region.
+                                        */
+                                       e820.nr_map = 0;
+                                       usermem = 1;
+                                       add_memory_region(0, LOWMEMSIZE(), E820_RAM);
+                               }
+                               mem_size = memparse(from+4, &from);
+                               if (*from == '@')
+                                       start_at = memparse(from+1, &from);
+                               else {
+                                       start_at = HIGH_MEMORY;
+                                       mem_size -= HIGH_MEMORY;
+                                       usermem=0;
+                               }
+                               add_memory_region(start_at, mem_size, E820_RAM);
+                       }
+               }
+               c = *(from++);
+               if (!c)
+                       break;
+               if (COMMAND_LINE_SIZE <= ++len)
+                       break;
+               *(to++) = c;
+       }
+       *to = '\0';
+       *cmdline_p = command_line;
+       if (usermem) {
+               printk(KERN_INFO "user-defined physical RAM map:\n");
+               print_memory_map("user");
+       }
+}
+
+void __init setup_arch(char **cmdline_p)
+{
+       unsigned long bootmap_size, low_mem_size;
+       unsigned long start_pfn, max_pfn, max_low_pfn;
+       int i;
+
+       ROOT_DEV = to_kdev_t(ORIG_ROOT_DEV);
+       drive_info = DRIVE_INFO;
+       screen_info = SCREEN_INFO;
+       apm_info.bios = APM_BIOS_INFO;
+       aux_device_present = AUX_DEVICE_INFO;
+
+#ifdef CONFIG_BLK_DEV_RAM
+       rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
+       rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
+       rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
+#endif
+       setup_memory_region();
+
+       if (!MOUNT_ROOT_RDONLY)
+               root_mountflags &= ~MS_RDONLY;
+       init_mm.start_code = (unsigned long) &_text;
+       init_mm.end_code = (unsigned long) &_etext;
+       init_mm.end_data = (unsigned long) &_edata;
+       init_mm.brk = (unsigned long) &_end;
+
+       code_resource.start = virt_to_phys(&_text);
+       code_resource.end = virt_to_phys(&_etext)-1;
+       data_resource.start = virt_to_phys(&_etext);
+       data_resource.end = virt_to_phys(&_edata)-1;
+
+       parse_mem_cmdline(cmdline_p);
+
+#define PFN_UP(x)      (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PFN_DOWN(x)    ((x) >> PAGE_SHIFT)
+#define PFN_PHYS(x)    ((x) << PAGE_SHIFT)
+
+#define VMALLOC_RESERVE        (unsigned long)(4096 << 20)
+#define MAXMEM         (unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)
+#define MAXMEM_PFN     PFN_DOWN(MAXMEM)
+
+       /*
+        * partially used pages are not usable - thus
+        * we are rounding upwards:
+        */
+       start_pfn = PFN_UP(__pa(&_end));
+
+       /*
+        * Find the highest page frame number we have available
+        */
+       max_pfn = 0;
+       for (i = 0; i < e820.nr_map; i++) {
+               unsigned long start, end;
+               /* RAM? */
+               if (e820.map[i].type != E820_RAM)
+                       continue;
+               start = PFN_UP(e820.map[i].addr);
+               end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+               if (start >= end)
+                       continue;
+               if (end > max_pfn)
+                       max_pfn = end;
+       }
+
+       /*
+        * Determine low and high memory ranges:
+        */
+       max_low_pfn = max_pfn;
+       if (max_low_pfn > MAXMEM_PFN) {
+               max_low_pfn = MAXMEM_PFN;
+       }
+
+
+       /*
+        * Initialize the boot-time allocator (with low memory only):
+        */
+       bootmap_size = init_bootmem(start_pfn, max_low_pfn);
+
+       /*
+        * Register fully available low RAM pages with the bootmem allocator.
+        */
+       for (i = 0; i < e820.nr_map; i++) {
+               unsigned long curr_pfn, last_pfn, size;
+               /*
+                * Reserve usable low memory
+                */
+               if (e820.map[i].type != E820_RAM)
+                       continue;
+               /*
+                * We are rounding up the start address of usable memory:
+                */
+               curr_pfn = PFN_UP(e820.map[i].addr);
+               if (curr_pfn >= max_low_pfn)
+                       continue;
+               /*
+                * ... and at the end of the usable range downwards:
+                */
+               last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+
+               if (last_pfn > max_low_pfn)
+                       last_pfn = max_low_pfn;
+
+               /*
+                * .. finally, did all the rounding and playing
+                * around just make the area go away?
+                */
+               if (last_pfn <= curr_pfn)
+                       continue;
+
+               size = last_pfn - curr_pfn;
+               free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
+       }
+       /*
+        * Reserve the bootmem bitmap itself as well. We do this in two
+        * steps (first step was init_bootmem()) because this catches
+        * the (very unlikely) case of us accidentally initializing the
+        * bootmem allocator with an invalid RAM area.
+        */
+       reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
+                        bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
+
+       /*
+        * reserve physical page 0 - it's a special BIOS page on many boxes,
+        * enabling clean reboots, SMP operation, laptop functions.
+        */
+       reserve_bootmem(0, PAGE_SIZE);
+
+#ifdef CONFIG_SMP
+       /*
+        * But first pinch a few for the stack/trampoline stuff
+        * FIXME: Don't need the extra page at 4K, but need to fix
+        * trampoline before removing it. (see the GDT stuff)
+        */
+       reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+       /*
+        * Find and reserve possible boot-time SMP configuration:
+        */
+       find_smp_config();
+#endif
+#ifdef CONFIG_BLK_DEV_INITRD
+       if (LOADER_TYPE && INITRD_START) {
+               if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
+                       reserve_bootmem(INITRD_START, INITRD_SIZE);
+                       initrd_start =
+                               INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
+                       initrd_end = initrd_start+INITRD_SIZE;
+               }
+               else {
+                       printk(KERN_ERR "initrd extends beyond end of memory "
+                           "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+                           (unsigned long)(INITRD_START + INITRD_SIZE),
+                           (unsigned long)(max_low_pfn << PAGE_SHIFT));
+                       initrd_start = 0;
+               }
+       }
+#endif
+
+       /*
+        * NOTE: before this point _nobody_ is allowed to allocate
+        * any memory using the bootmem allocator.
+        */
+
+#ifdef CONFIG_SMP
+       smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
+#endif
+       paging_init();
+#ifdef CONFIG_X86_LOCAL_APIC
+       /*
+        * get boot-time SMP configuration:
+        */
+       if (smp_found_config)
+               get_smp_config();
+       init_apic_mappings();
+#endif
+
+
+       /*
+        * Request address space for all standard RAM and ROM resources
+        * and also for regions reported as reserved by the e820.
+        */
+       probe_roms();
+       for (i = 0; i < e820.nr_map; i++) {
+               struct resource *res;
+               if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
+                       continue;
+               res = alloc_bootmem_low(sizeof(struct resource));
+               switch (e820.map[i].type) {
+               case E820_RAM:  res->name = "System RAM"; break;
+               case E820_ACPI: res->name = "ACPI Tables"; break;
+               case E820_NVS:  res->name = "ACPI Non-volatile Storage"; break;
+               default:        res->name = "reserved";
+               }
+               res->start = e820.map[i].addr;
+               res->end = res->start + e820.map[i].size - 1;
+               res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+               request_resource(&iomem_resource, res);
+               if (e820.map[i].type == E820_RAM) {
+                       /*
+                        *  We dont't know which RAM region contains kernel data,
+                        *  so we try it repeatedly and let the resource manager
+                        *  test it.
+                        */
+                       request_resource(res, &code_resource);
+                       request_resource(res, &data_resource);
+               }
+       }
+       request_resource(&iomem_resource, &vram_resource);
+
+       /* request I/O space for devices used on all i[345]86 PCs */
+       for (i = 0; i < STANDARD_IO_RESOURCES; i++)
+               request_resource(&ioport_resource, standard_io_resources+i);
+
+       /* Tell the PCI layer not to allocate too close to the RAM area.. */
+       low_mem_size = ((max_low_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
+       if (low_mem_size > pci_mem_start)
+               pci_mem_start = low_mem_size;
+
+#ifdef CONFIG_VT
+#if defined(CONFIG_VGA_CONSOLE)
+       conswitchp = &vga_con;
+#elif defined(CONFIG_DUMMY_CONSOLE)
+       conswitchp = &dummy_con;
+#endif
+#endif
+}
+
+#ifndef CONFIG_X86_TSC
+static int tsc_disable __initdata = 0;
+
+static int __init tsc_setup(char *str)
+{
+       tsc_disable = 1;
+       return 1;
+}
+
+__setup("notsc", tsc_setup);
+#endif
+
+static int __init get_model_name(struct cpuinfo_x86 *c)
+{
+       unsigned int *v;
+
+       if (cpuid_eax(0x80000000) < 0x80000004)
+               return 0;
+
+       v = (unsigned int *) c->x86_model_id;
+       cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
+       cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
+       cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
+       c->x86_model_id[48] = 0;
+       return 1;
+}
+
+
+static void __init display_cacheinfo(struct cpuinfo_x86 *c)
+{
+       unsigned int n, dummy, ecx, edx;
+
+       n = cpuid_eax(0x80000000);
+
+       if (n >= 0x80000005) {
+               cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
+               printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
+                       edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
+               c->x86_cache_size=(ecx>>24)+(edx>>24);  
+       }
+
+       if (n < 0x80000006)
+               return;
+
+       ecx = cpuid_ecx(0x80000006);
+       c->x86_cache_size = ecx >> 16;
+
+       printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
+               c->x86_cache_size, ecx & 0xFF);
+}
+
+
+static int __init init_amd(struct cpuinfo_x86 *c)
+{
+       int r;
+
+       /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+          3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
+       clear_bit(0*32+31, &c->x86_capability);
+       
+       r = get_model_name(c);
+       display_cacheinfo(c);
+       return r;
+}
+
+
+void __init get_cpu_vendor(struct cpuinfo_x86 *c)
+{
+       char *v = c->x86_vendor_id;
+
+       if (!strcmp(v, "AuthenticAMD"))
+               c->x86_vendor = X86_VENDOR_AMD;
+       else
+               c->x86_vendor = X86_VENDOR_UNKNOWN;
+}
+
+struct cpu_model_info {
+       int vendor;
+       int family;
+       char *model_names[16];
+};
+
+int __init x86_fxsr_setup(char * s)
+{
+       disable_x86_fxsr = 1;
+       return 1;
+}
+__setup("nofxsr", x86_fxsr_setup);
+
+
+
+/*
+ * This does the hard work of actually picking apart the CPU stuff...
+ */
+void __init identify_cpu(struct cpuinfo_x86 *c)
+{
+       int junk, i;
+       u32 xlvl, tfms;
+
+       c->loops_per_jiffy = loops_per_jiffy;
+       c->x86_cache_size = -1;
+       c->x86_vendor = X86_VENDOR_UNKNOWN;
+       c->x86_model = c->x86_mask = 0; /* So far unknown... */
+       c->x86_vendor_id[0] = '\0'; /* Unset */
+       c->x86_model_id[0] = '\0';  /* Unset */
+       memset(&c->x86_capability, 0, sizeof c->x86_capability);
+
+       /* Get vendor name */
+       cpuid(0x00000000, &c->cpuid_level,
+             (int *)&c->x86_vendor_id[0],
+             (int *)&c->x86_vendor_id[8],
+             (int *)&c->x86_vendor_id[4]);
+               
+       get_cpu_vendor(c);
+       /* Initialize the standard set of capabilities */
+       /* Note that the vendor-specific code below might override */
+
+       /* Intel-defined flags: level 0x00000001 */
+       if ( c->cpuid_level >= 0x00000001 ) {
+               cpuid(0x00000001, &tfms, &junk, &junk,
+                     &c->x86_capability[0]);
+               c->x86 = (tfms >> 8) & 15;
+               c->x86_model = (tfms >> 4) & 15;
+               c->x86_mask = tfms & 15;
+       } else {
+               /* Have CPUID level 0 only - unheard of */
+               c->x86 = 4;
+       }
+
+       /* AMD-defined flags: level 0x80000001 */
+       xlvl = cpuid_eax(0x80000000);
+       if ( (xlvl & 0xffff0000) == 0x80000000 ) {
+               if ( xlvl >= 0x80000001 )
+                       c->x86_capability[1] = cpuid_edx(0x80000001);
+               if ( xlvl >= 0x80000004 )
+                       get_model_name(c); /* Default name */
+       }
+
+       /* Transmeta-defined flags: level 0x80860001 */
+       xlvl = cpuid_eax(0x80860000);
+       if ( (xlvl & 0xffff0000) == 0x80860000 ) {
+               if (  xlvl >= 0x80860001 )
+                       c->x86_capability[2] = cpuid_edx(0x80860001);
+       }
+
+
+       printk(KERN_DEBUG "CPU: Before vendor init, caps: %08x %08x %08x, vendor = %d\n",
+              c->x86_capability[0],
+              c->x86_capability[1],
+              c->x86_capability[2],
+              c->x86_vendor);
+
+       /*
+        * Vendor-specific initialization.  In this section we
+        * canonicalize the feature flags, meaning if there are
+        * features a certain CPU supports which CPUID doesn't
+        * tell us, CPUID claiming incorrect flags, or other bugs,
+        * we handle them here.
+        *
+        * At the end of this section, c->x86_capability better
+        * indicate the features this CPU genuinely supports!
+        */
+       switch ( c->x86_vendor ) {
+
+               case X86_VENDOR_AMD:
+                       init_amd(c);
+                       break;
+
+               case X86_VENDOR_UNKNOWN:
+               default:
+                       /* Not much we can do here... */
+                       break;
+       }
+       
+       printk(KERN_DEBUG "CPU: After vendor init, caps: %08x %08x %08x %08x\n",
+              c->x86_capability[0],
+              c->x86_capability[1],
+              c->x86_capability[2],
+              c->x86_capability[3]);
+
+       /*
+        * The vendor-specific functions might have changed features.  Now
+        * we do "generic changes."
+        */
+
+       /* TSC disabled? */
+#ifndef CONFIG_X86_TSC
+       if ( tsc_disable )
+               clear_bit(X86_FEATURE_TSC, &c->x86_capability);
+#endif
+
+       /* FXSR disabled? */
+       if (disable_x86_fxsr) {
+               clear_bit(X86_FEATURE_FXSR, &c->x86_capability);
+               clear_bit(X86_FEATURE_XMM, &c->x86_capability);
+       }
+
+       /* Now the feature flags better reflect actual CPU features! */
+
+       printk(KERN_DEBUG "CPU:     After generic, caps: %08x %08x %08x %08x\n",
+              c->x86_capability[0],
+              c->x86_capability[1],
+              c->x86_capability[2],
+              c->x86_capability[3]);
+
+       /*
+        * On SMP, boot_cpu_data holds the common feature set between
+        * all CPUs; so make sure that we indicate which features are
+        * common between the CPUs.  The first time this routine gets
+        * executed, c == &boot_cpu_data.
+        */
+       if ( c != &boot_cpu_data ) {
+               /* AND the already accumulated flags with these */
+               for ( i = 0 ; i < NCAPINTS ; i++ )
+                       boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
+       }
+
+       printk(KERN_DEBUG "CPU:             Common caps: %08x %08x %08x %08x\n",
+              boot_cpu_data.x86_capability[0],
+              boot_cpu_data.x86_capability[1],
+              boot_cpu_data.x86_capability[2],
+              boot_cpu_data.x86_capability[3]);
+}
+
+void __init print_cpu_info(struct cpuinfo_x86 *c)
+{
+       if (c->x86_model_id[0])
+               printk("AMD %s", c->x86_model_id);
+
+       if (c->x86_mask || c->cpuid_level >= 0) 
+               printk(" stepping %02x\n", c->x86_mask);
+       else
+               printk("\n");
+}
+
+/*
+ *     Get CPU information for use by the procfs.
+ */
+
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+       struct cpuinfo_x86 *c = v;
+       int index = c - cpu_data;
+
+       /* 
+        * These flag bits must match the definitions in <asm/cpufeature.h>.
+        * NULL means this bit is undefined or reserved; either way it doesn't
+        * have meaning as far as Linux is concerned.  Note that it's important
+        * to realize there is a difference between this table and CPUID -- if
+        * applications want to get the raw CPUID data, they should access
+        * /dev/cpu/<cpu_nr>/cpuid instead.
+        */
+       static char *x86_cap_flags[] = {
+               /* Intel-defined */
+               "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
+               "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
+               "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
+               "fxsr", "sse", "sse2", "ss", NULL, "tm", "ia64", NULL,
+
+               /* AMD-defined */
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, "mmxext", NULL,
+               NULL, NULL, NULL, NULL, NULL, "lm", "3dnowext", "3dnow",
+
+               /* Transmeta-defined */
+               "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+               /* Other (Linux-defined) */
+               "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+               NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+       };
+
+#ifdef CONFIG_SMP
+       if (!(cpu_online_map & (1<<(c-cpu_data))))
+               return 0;
+#endif
+
+       seq_printf(m,"processor\t: %d\n"
+                    "vendor_id\t: %s\n"
+                    "cpu family\t: %d\n"
+                    "model\t\t: %d\n"
+                    "model name\t: %s\n",
+                  index,
+                    c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
+                    c->x86,
+                    c->x86_model,
+                    c->x86_model_id[0] ? c->x86_model_id : "unknown");
+       
+       if (c->x86_mask || c->cpuid_level >= 0)
+               seq_printf(m, "stepping\t: %d\n", c->x86_mask);
+       else
+               seq_printf(m, "stepping\t: unknown\n");
+       
+       if ( test_bit(X86_FEATURE_TSC, &c->x86_capability) ) {
+               seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n",
+                            cpu_khz / 1000, (cpu_khz % 1000));
+       }
+
+       /* Cache size */
+       if (c->x86_cache_size >= 0) 
+               seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+       
+       seq_printf(m,
+               "fpu\t\t: yes\n"
+               "fpu_exception\t: yes\n"
+               "cpuid level\t: %d\n"
+               "wp\t\t: yes\n"
+               "flags\t\t:",
+                  c->cpuid_level);
+
+       { 
+               int i; 
+               for ( i = 0 ; i < 32*NCAPINTS ; i++ )
+                       if ( test_bit(i, &c->x86_capability) &&
+                            x86_cap_flags[i] != NULL )
+                               seq_printf(m, " %s", x86_cap_flags[i]);
+       }
+               
+       seq_printf(m, "\nbogomips\t: %lu.%02lu\n\n",
+                  c->loops_per_jiffy/(500000/HZ),
+                  (c->loops_per_jiffy/(5000/HZ)) % 100);
+
+       return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+       return *pos < NR_CPUS ? cpu_data + *pos : NULL;
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       ++*pos;
+       return c_start(m, pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+
+struct seq_operations cpuinfo_op = {
+       start:  c_start,
+       next:   c_next,
+       stop:   c_stop,
+       show:   show_cpuinfo,
+};
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
new file mode 100644 (file)
index 0000000..82e96be
--- /dev/null
@@ -0,0 +1,142 @@
+/* 
+ * X86-64 specific setup part.
+ * Copyright (C) 1995  Linus Torvalds
+ * Copyright 2001 2002 SuSE Labs / Andi Kleen.
+ * See setup.c for older changelog.
+ */ 
+#include <linux/config.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/pda.h>
+#include <asm/pda.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/bitops.h>
+#include <asm/atomic.h>
+#include <asm/mmu_context.h>
+
+char x86_boot_params[2048] __initdata = {0,};
+
+static unsigned long cpu_initialized __initdata = 0;
+
+struct x8664_pda cpu_pda[NR_CPUS] __cacheline_aligned; 
+
+extern void system_call(void); 
+extern void ia32_cstar_target(void); 
+
+struct desc_ptr gdt_descr = { 0 /* filled in */, (unsigned long) gdt_table }; 
+struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; 
+
+void pda_init(int cpu)
+{ 
+       cpu_pda[cpu].me = &cpu_pda[cpu]; 
+       cpu_pda[cpu].cpunumber = cpu; 
+       cpu_pda[cpu].irqcount = -1;
+       cpu_pda[cpu].irqstackptr = cpu_pda[cpu].irqstack + sizeof(cpu_pda[0].irqstack);
+       /* others are initialized in smpboot.c */
+       if (cpu == 0) {
+               cpu_pda[cpu].pcurrent = &init_task;
+               cpu_pda[cpu].kernelstack = 
+                       (unsigned long)&init_thread_union+THREAD_SIZE-PDA_STACKOFFSET;
+       }
+       asm volatile("movl %0,%%gs ; movl %0,%%fs" :: "r" (0)); 
+       wrmsrl(MSR_GS_BASE, cpu_pda + cpu);
+} 
+
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ */
+void __init cpu_init (void)
+{
+#ifdef CONFIG_SMP
+       int nr = current_thread_info()->cpu;
+#else
+       int nr = smp_processor_id();
+#endif
+       struct tss_struct * t = &init_tss[nr];
+       unsigned long v; 
+
+       /* CPU 0 is initialised in head64.c */
+       if (nr != 0) 
+               pda_init(nr);  
+
+       if (test_and_set_bit(nr, &cpu_initialized)) {
+               printk("CPU#%d already initialized!\n", nr);
+               for (;;) __sti();
+       }
+       printk("Initializing CPU#%d\n", nr);
+
+       if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
+               clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+
+       gdt_descr.size = (__u8*) gdt_end - (__u8*)gdt_table; 
+
+       __asm__ __volatile__("lgdt %0": "=m" (gdt_descr));
+       __asm__ __volatile__("lidt %0": "=m" (idt_descr));
+
+       /*
+        * Delete NT
+        */
+
+       __asm__ volatile("pushfq ; popq %%rax ; btr $14,%%rax ; pushq %%rax ; popfq" :: : "eax");
+
+       /* 
+        * LSTAR and STAR live in a bit strange symbiosis.
+        * They both write to the same internal register. STAR allows to set CS/DS
+        * but only a 32bit target. LSTAR sets the 64bit rip.    
+        */ 
+       wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32); 
+       wrmsrl(MSR_LSTAR, system_call); 
+
+#ifdef CONFIG_IA32_EMULATION                   
+       wrmsrl(MSR_CSTAR, ia32_cstar_target); 
+#endif
+
+       rdmsrl(MSR_EFER, v); 
+       wrmsrl(MSR_EFER, v|1); 
+       
+       /* Flags to clear on syscall */
+       wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE); 
+
+
+       wrmsrl(MSR_FS_BASE, 0);
+       wrmsrl(MSR_KERNEL_GS_BASE, 0);
+       barrier(); 
+
+       /*
+        * set up and load the per-CPU TSS and LDT
+        */
+       atomic_inc(&init_mm.mm_count);
+       current->active_mm = &init_mm;
+       if(current->mm)
+               BUG();
+       enter_lazy_tlb(&init_mm, current, nr);
+
+       set_tssldt_descriptor((__u8 *)tss_start + (nr*16), (unsigned long) t, 
+                             DESC_TSS, 
+                             offsetof(struct tss_struct, io_bitmap)); 
+       load_TR(nr);
+       load_LDT(&init_mm);
+
+       /*
+        * Clear all 6 debug registers:
+        */
+
+       set_debug(0UL, 0);
+       set_debug(0UL, 1);
+       set_debug(0UL, 2);
+       set_debug(0UL, 3);
+       set_debug(0UL, 6);
+       set_debug(0UL, 7);
+
+       /*
+        * Force FPU initialization:
+        */
+       clear_thread_flag(TIF_USEDFPU); 
+       current->used_math = 0;
+       stts();
+}
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
new file mode 100644 (file)
index 0000000..2167225
--- /dev/null
@@ -0,0 +1,591 @@
+/*
+ *  linux/arch/x86_64/kernel/signal.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
+ *
+ *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+ *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
+ *  2000-2002   x86-64 support by Andi Kleen
+ * 
+ *  $Id: signal.c,v 1.18 2001/10/17 22:30:37 ak Exp $
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <asm/ucontext.h>
+#include <asm/uaccess.h>
+#include <asm/i387.h>
+
+#define DEBUG_SIG 0
+
+#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+
+void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+               sigset_t *set, struct pt_regs * regs); 
+void ia32_setup_frame(int sig, struct k_sigaction *ka,
+            sigset_t *set, struct pt_regs * regs); 
+
+int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from)
+{
+       if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t)))
+               return -EFAULT;
+       if (from->si_code < 0)
+               return __copy_to_user(to, from, sizeof(siginfo_t));
+       else {
+               int err;
+
+               /* If you change siginfo_t structure, please be sure
+                  this code is fixed accordingly.
+                  It should never copy any pad contained in the structure
+                  to avoid security leaks, but must copy the generic
+                  3 ints plus the relevant union member.  */
+               err = __put_user(from->si_signo, &to->si_signo);
+               err |= __put_user(from->si_errno, &to->si_errno);
+               err |= __put_user((short)from->si_code, &to->si_code);
+               /* First 32bits of unions are always present.  */
+               err |= __put_user(from->si_pid, &to->si_pid);
+               switch (from->si_code >> 16) {
+               case __SI_FAULT >> 16:
+                       break;
+               case __SI_CHLD >> 16:
+                       err |= __put_user(from->si_utime, &to->si_utime);
+                       err |= __put_user(from->si_stime, &to->si_stime);
+                       err |= __put_user(from->si_status, &to->si_status);
+               default:
+                       err |= __put_user(from->si_uid, &to->si_uid);
+                       break;
+               /* case __SI_RT: This is not generated by the kernel as of now.  */
+               }
+               return err;
+       }
+}
+
+asmlinkage long
+sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, struct pt_regs regs)
+{
+       sigset_t saveset, newset;
+
+       /* XXX: Don't preclude handling different sized sigset_t's.  */
+       if (sigsetsize != sizeof(sigset_t))
+               return -EINVAL;
+
+       if (copy_from_user(&newset, unewset, sizeof(newset)))
+               return -EFAULT;
+       sigdelsetmask(&newset, ~_BLOCKABLE);
+
+       spin_lock_irq(&current->sigmask_lock);
+       saveset = current->blocked;
+       current->blocked = newset;
+       recalc_sigpending(current);
+       spin_unlock_irq(&current->sigmask_lock);
+#if DEBUG_SIG
+       printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n",
+               saveset, newset, &regs, regs.rip);
+#endif 
+       regs.rax = -EINTR;
+       while (1) {
+               current->state = TASK_INTERRUPTIBLE;
+               schedule();
+               if (do_signal(&regs, &saveset))
+                       return -EINTR;
+       }
+}
+
+asmlinkage long
+sys_sigaltstack(const stack_t *uss, stack_t *uoss, struct pt_regs regs)
+{
+       return do_sigaltstack(uss, uoss, regs.rsp);
+}
+
+
+/*
+ * Do a signal return; undo the signal stack.
+ */
+
+struct rt_sigframe
+{
+       char *pretcode;
+       struct ucontext uc;
+       struct siginfo info;
+       struct _fpstate fpstate;
+       char retcode[8];
+};
+
+static int
+restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc, unsigned long *prax)
+{
+       unsigned int err = 0;
+
+#define COPY(x)                err |= __get_user(regs->x, &sc->x)
+
+#define COPY_SEG(seg)                                                  \
+       { unsigned short tmp;                                           \
+         err |= __get_user(tmp, &sc->seg);                             \
+         regs->x##seg = tmp; }
+
+#define COPY_SEG_STRICT(seg)                                           \
+       { unsigned short tmp;                                           \
+         err |= __get_user(tmp, &sc->seg);                             \
+         regs->x##seg = tmp|3; }
+
+#define GET_SEG(seg)                                                   \
+       { unsigned short tmp;                                           \
+         err |= __get_user(tmp, &sc->seg);                             \
+         loadsegment(seg,tmp); }
+
+       /* XXX: rdmsr for 64bits */ 
+       GET_SEG(gs);
+       GET_SEG(fs);
+       COPY(rdi); COPY(rsi); COPY(rbp); COPY(rsp); COPY(rbx);
+       COPY(rdx); COPY(rcx); COPY(rip);
+       COPY(r8);
+       COPY(r9);
+       COPY(r10);
+       COPY(r11);
+       COPY(r12);
+       COPY(r13);
+       COPY(r14);
+       COPY(r15);
+
+       
+       {
+               unsigned int tmpflags;
+               err |= __get_user(tmpflags, &sc->eflags);
+               regs->eflags = (regs->eflags & ~0x40DD5) | (tmpflags & 0x40DD5);
+               regs->orig_rax = -1;            /* disable syscall checks */
+       }
+
+       {
+               struct _fpstate * buf;
+               err |= __get_user(buf, &sc->fpstate);
+               if (buf) {
+                       if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
+                               goto badframe;
+                       err |= restore_i387(buf);
+               }
+       }
+
+       err |= __get_user(*prax, &sc->rax);
+       return err;
+
+badframe:
+       return 1;
+}
+
+asmlinkage long sys_rt_sigreturn(struct pt_regs regs)
+{
+       struct rt_sigframe *frame = (struct rt_sigframe *)(regs.rsp - 8);
+       sigset_t set;
+       stack_t st;
+       long eax;
+
+       if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
+               goto badframe;
+       if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+               goto badframe;
+
+       sigdelsetmask(&set, ~_BLOCKABLE);
+       spin_lock_irq(&current->sigmask_lock);
+       current->blocked = set;
+       recalc_sigpending(current);
+       spin_unlock_irq(&current->sigmask_lock);
+       
+       if (restore_sigcontext(&regs, &frame->uc.uc_mcontext, &eax))
+               goto badframe;
+
+#if DEBUG_SIG
+       printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs.rip,regs.rsp,frame,eax);
+#endif
+
+       if (__copy_from_user(&st, &frame->uc.uc_stack, sizeof(st)))
+               goto badframe;
+       /* It is more difficult to avoid calling this function than to
+          call it and ignore errors.  */
+       do_sigaltstack(&st, NULL, regs.rsp);
+
+       return eax;
+
+badframe:
+#if DEBUG_SIG
+       printk("%d bad frame %p\n",current->pid,frame);
+#endif
+       force_sig(SIGSEGV, current);
+       return 0;
+}      
+
+/*
+ * Set up a signal frame.
+ */
+
+static int
+setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate,
+                struct pt_regs *regs, unsigned long mask)
+{
+       int tmp, err = 0;
+
+       tmp = 0;
+       __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp));
+       err |= __put_user(tmp, (unsigned int *)&sc->gs);
+       __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp));
+       err |= __put_user(tmp, (unsigned int *)&sc->fs);
+
+       err |= __put_user(regs->rdi, &sc->rdi);
+       err |= __put_user(regs->rsi, &sc->rsi);
+       err |= __put_user(regs->rbp, &sc->rbp);
+       err |= __put_user(regs->rsp, &sc->rsp);
+       err |= __put_user(regs->rbx, &sc->rbx);
+       err |= __put_user(regs->rdx, &sc->rdx);
+       err |= __put_user(regs->rcx, &sc->rcx);
+       err |= __put_user(regs->rax, &sc->rax);
+       err |= __put_user(regs->r8, &sc->r8);
+       err |= __put_user(regs->r9, &sc->r9);
+       err |= __put_user(regs->r10, &sc->r10);
+       err |= __put_user(regs->r11, &sc->r11);
+       err |= __put_user(regs->r12, &sc->r12);
+       err |= __put_user(regs->r13, &sc->r13);
+       err |= __put_user(regs->r14, &sc->r14);
+       err |= __put_user(regs->r15, &sc->r15);
+       err |= __put_user(current->thread.trap_no, &sc->trapno);
+       err |= __put_user(current->thread.error_code, &sc->err);
+       err |= __put_user(regs->rip, &sc->rip);
+       err |= __put_user(regs->eflags, &sc->eflags);
+       err |= __put_user(regs->rsp, &sc->rsp_at_signal);
+
+       tmp = save_i387(fpstate);
+       if (tmp < 0)
+         err = 1;
+       else
+         err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
+
+       /* non-iBCS2 extensions.. */
+       err |= __put_user(mask, &sc->oldmask);
+       err |= __put_user(current->thread.cr2, &sc->cr2);
+
+       return err;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+       unsigned long rsp;
+
+       /* Default to using normal stack - redzone*/
+       rsp = regs->rsp - 128;
+
+       /* This is the X/Open sanctioned signal stack switching.  */
+       if (ka->sa.sa_flags & SA_ONSTACK) {
+               if (! sas_ss_flags(rsp) == 0)
+                       rsp = current->sas_ss_sp + current->sas_ss_size;
+       }
+
+       return (void *)((rsp - frame_size) & -16UL);
+}
+
+static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+                          sigset_t *set, struct pt_regs * regs)
+{
+       struct thread_info *ti; 
+       struct rt_sigframe *frame;
+       int err = 0;
+
+       frame = get_sigframe(ka, regs, sizeof(*frame));
+
+       if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+               goto give_sigsegv;
+
+       if (ka->sa.sa_flags & SA_SIGINFO) { 
+               err |= copy_siginfo_to_user(&frame->info, info);
+               if (err)
+                       goto give_sigsegv;
+       }
+               
+       /* Create the ucontext.  */
+       err |= __put_user(0, &frame->uc.uc_flags);
+       err |= __put_user(0, &frame->uc.uc_link);
+       err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+       err |= __put_user(sas_ss_flags(regs->rsp),
+                         &frame->uc.uc_stack.ss_flags);
+       err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+       err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
+                               regs, set->sig[0]);
+       err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+       if (err)
+               goto give_sigsegv;
+
+
+       /* Set up to return from userspace.  If provided, use a stub
+          already in userspace.  */
+       /* x86-64 should always use SA_RESTORER. */
+       if (ka->sa.sa_flags & SA_RESTORER) {
+               err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
+       } else {
+               printk("%s forgot to set SA_RESTORER for signal %d.\n", current->comm, sig); 
+               goto give_sigsegv; 
+       }
+
+       if (err)
+               goto give_sigsegv;
+
+#if DEBUG_SIG
+       printk("%d old rip %lx old rsp %lx old rax %lx\n", current->pid,regs->rip,regs->rsp,regs->rax);
+#endif
+
+       ti = current_thread_info(); 
+
+       /* Set up registers for signal handler */
+       regs->rdi = (ti->exec_domain
+                    && ti->exec_domain->signal_invmap
+                    && sig < 32
+                    ? ti->exec_domain->signal_invmap[sig]
+                    : sig);
+       regs->rax = 0;  /* In case the signal handler was declared without prototypes */ 
+
+
+       /* This also works for non SA_SIGINFO handlers because they expect the
+          next argument after the signal number on the stack. */
+       regs->rsi = (unsigned long)&frame->info; 
+       regs->rdx = (unsigned long)&frame->uc; 
+       regs->rsp = (unsigned long) frame;
+       regs->rip = (unsigned long) ka->sa.sa_handler;
+
+       set_fs(USER_DS);
+       // XXX: cs
+       regs->eflags &= ~TF_MASK;
+
+#if DEBUG_SIG
+       printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
+               current->comm, current->pid, frame, regs->rip, frame->pretcode);
+#endif
+
+       return;
+
+give_sigsegv:
+       if (sig == SIGSEGV)
+               ka->sa.sa_handler = SIG_DFL;
+       force_sig(SIGSEGV, current);
+}
+
+/*
+ * OK, we're invoking a handler
+ */    
+
+static void
+handle_signal(unsigned long sig, struct k_sigaction *ka,
+             siginfo_t *info, sigset_t *oldset, struct pt_regs * regs)
+{
+#if DEBUG_SIG
+       printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n", current->pid, sig, 
+               regs->rip, regs->rsp, regs);
+#endif
+
+       /* Are we from a system call? */
+       if (regs->orig_rax >= 0) {
+               /* If so, check system call restarting.. */
+               switch (regs->rax) {
+                       case -ERESTARTNOHAND:
+                               regs->rax = -EINTR;
+                               break;
+
+                       case -ERESTARTSYS:
+                               if (!(ka->sa.sa_flags & SA_RESTART)) {
+                                       regs->rax = -EINTR;
+                                       break;
+                               }
+                       /* fallthrough */
+                       case -ERESTARTNOINTR:
+                               regs->rax = regs->orig_rax;
+                               regs->rip -= 2;
+               }
+       }
+
+#ifdef CONFIG_IA32_EMULATION
+       if (test_thread_flag(TIF_IA32)) {
+               if (ka->sa.sa_flags & SA_SIGINFO)
+                       ia32_setup_rt_frame(sig, ka, info, oldset, regs);
+               else
+                       ia32_setup_frame(sig, ka, oldset, regs);
+       } else 
+#endif
+       setup_rt_frame(sig, ka, info, oldset, regs);
+
+       if (ka->sa.sa_flags & SA_ONESHOT)
+               ka->sa.sa_handler = SIG_DFL;
+
+       if (!(ka->sa.sa_flags & SA_NODEFER)) {
+               spin_lock_irq(&current->sigmask_lock);
+               sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
+               sigaddset(&current->blocked,sig);
+               recalc_sigpending(current);
+               spin_unlock_irq(&current->sigmask_lock);
+       }
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ */
+int do_signal(struct pt_regs *regs, sigset_t *oldset)
+{
+       siginfo_t info;
+       struct k_sigaction *ka;
+
+       /*
+        * We want the common case to go fast, which
+        * is why we may in certain cases get here from
+        * kernel mode. Just return without doing anything
+        * if so.
+        */
+       if ((regs->cs & 3) != 3) {
+               return 1;
+       }       
+
+       if (!oldset)
+               oldset = &current->blocked;
+
+       for (;;) {
+               unsigned long signr;
+
+               spin_lock_irq(&current->sigmask_lock);
+               signr = dequeue_signal(&current->blocked, &info);
+               spin_unlock_irq(&current->sigmask_lock);
+
+               if (!signr) { 
+                       break;
+               }
+
+               if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) {
+                       /* Let the debugger run.  */
+                       current->exit_code = signr;
+                       current->state = TASK_STOPPED;
+                       notify_parent(current, SIGCHLD);
+                       schedule();
+
+                       /* We're back.  Did the debugger cancel the sig?  */
+                       if (!(signr = current->exit_code))
+                               continue;
+                       current->exit_code = 0;
+
+                       /* The debugger continued.  Ignore SIGSTOP.  */
+                       if (signr == SIGSTOP)
+                               continue;
+
+                       /* Update the siginfo structure.  Is this good?  */
+                       if (signr != info.si_signo) {
+                               info.si_signo = signr;
+                               info.si_errno = 0;
+                               info.si_code = SI_USER;
+                               info.si_pid = current->p_pptr->pid;
+                               info.si_uid = current->p_pptr->uid;
+                       }
+
+                       /* If the (new) signal is now blocked, requeue it.  */
+                       if (sigismember(&current->blocked, signr)) {
+                               send_sig_info(signr, &info, current);
+                               continue;
+                       }
+               }
+
+               ka = &current->sig->action[signr-1];
+               if (ka->sa.sa_handler == SIG_IGN) {
+                       if (signr != SIGCHLD)
+                               continue;
+                       /* Check for SIGCHLD: it's special.  */
+                       while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0)
+                               /* nothing */;
+                       continue;
+               }
+
+               if (ka->sa.sa_handler == SIG_DFL) {
+                       int exit_code = signr;
+
+                       /* Init gets no signals it doesn't want.  */
+                       if (current->pid == 1)                        
+                               continue;
+
+                       switch (signr) {
+                       case SIGCONT: case SIGCHLD: case SIGWINCH:
+                               continue;
+
+                       case SIGTSTP: case SIGTTIN: case SIGTTOU:
+                               if (is_orphaned_pgrp(current->pgrp))
+                                       continue;
+                               /* FALLTHRU */
+
+                       case SIGSTOP: {
+                               struct signal_struct *sig;
+                               current->state = TASK_STOPPED;
+                               current->exit_code = signr;
+                               sig = current->p_pptr->sig;
+                               if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP))
+                                       notify_parent(current, SIGCHLD);
+                               schedule();
+                               continue;
+                       }
+
+                       case SIGQUIT: case SIGILL: case SIGTRAP:
+                       case SIGABRT: case SIGFPE: case SIGSEGV:
+                       case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ:
+                               if (do_coredump(signr, regs))
+                                       exit_code |= 0x80;
+                               /* FALLTHRU */
+
+                       default:
+                               sig_exit(signr, exit_code, &info);
+                               /* NOTREACHED */
+                       }
+               }
+
+               /* Reenable any watchpoints before delivering the
+                * signal to user space. The processor register will
+                * have been cleared if the watchpoint triggered
+                * inside the kernel.
+                */
+               __asm__("movq %0,%%db7" : : "r" (current->thread.debugreg[7]));
+
+               /* Whee!  Actually deliver the signal.  */
+               handle_signal(signr, ka, &info, oldset, regs);
+               return 1;
+       }
+
+       /* Did we come from a system call? */
+       if (regs->orig_rax >= 0) {
+               /* Restart the system call - no handlers present */
+               if (regs->rax == -ERESTARTNOHAND ||
+                   regs->rax == -ERESTARTSYS ||
+                   regs->rax == -ERESTARTNOINTR) {
+                       regs->rax = regs->orig_rax;
+                       regs->rcx -= 2;
+               }
+       }
+       return 0;
+}
+
+void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_flags)
+{
+#if DEBUG_SIG
+       printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n",
+              thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current)); 
+#endif
+              
+
+       /* deal with pending signal delivery */
+       if (thread_info_flags & _TIF_SIGPENDING)
+               do_signal(regs,oldset);
+}
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
new file mode 100644 (file)
index 0000000..62c8e7d
--- /dev/null
@@ -0,0 +1,584 @@
+/*
+ *     Intel SMP support routines.
+ *
+ *     (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *     (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ *
+ *     This code is released under the GNU General Public License version 2 or
+ *     later.
+ */
+
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/smp_lock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+
+#include <asm/mtrr.h>
+#include <asm/pgalloc.h>
+
+/*
+ *     Some notes on x86 processor bugs affecting SMP operation:
+ *
+ *     Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
+ *     The Linux implications for SMP are handled as follows:
+ *
+ *     Pentium III / [Xeon]
+ *             None of the E1AP-E3AP errata are visible to the user.
+ *
+ *     E1AP.   see PII A1AP
+ *     E2AP.   see PII A2AP
+ *     E3AP.   see PII A3AP
+ *
+ *     Pentium II / [Xeon]
+ *             None of the A1AP-A3AP errata are visible to the user.
+ *
+ *     A1AP.   see PPro 1AP
+ *     A2AP.   see PPro 2AP
+ *     A3AP.   see PPro 7AP
+ *
+ *     Pentium Pro
+ *             None of 1AP-9AP errata are visible to the normal user,
+ *     except occasional delivery of 'spurious interrupt' as trap #15.
+ *     This is very rare and a non-problem.
+ *
+ *     1AP.    Linux maps APIC as non-cacheable
+ *     2AP.    worked around in hardware
+ *     3AP.    fixed in C0 and above steppings microcode update.
+ *             Linux does not use excessive STARTUP_IPIs.
+ *     4AP.    worked around in hardware
+ *     5AP.    symmetric IO mode (normal Linux operation) not affected.
+ *             'noapic' mode has vector 0xf filled out properly.
+ *     6AP.    'noapic' mode might be affected - fixed in later steppings
+ *     7AP.    We do not assume writes to the LVT deassering IRQs
+ *     8AP.    We do not enable low power mode (deep sleep) during MP bootup
+ *     9AP.    We do not use mixed mode
+ *
+ *     Pentium
+ *             There is a marginal case where REP MOVS on 100MHz SMP
+ *     machines with B stepping processors can fail. XXX should provide
+ *     an L1cache=Writethrough or L1cache=off option.
+ *
+ *             B stepping CPUs may hang. There are hardware work arounds
+ *     for this. We warn about it in case your board doesnt have the work
+ *     arounds. Basically thats so I can tell anyone with a B stepping
+ *     CPU and SMP problems "tough".
+ *
+ *     Specific items [From Pentium Processor Specification Update]
+ *
+ *     1AP.    Linux doesn't use remote read
+ *     2AP.    Linux doesn't trust APIC errors
+ *     3AP.    We work around this
+ *     4AP.    Linux never generated 3 interrupts of the same priority
+ *             to cause a lost local interrupt.
+ *     5AP.    Remote read is never used
+ *     6AP.    not affected - worked around in hardware
+ *     7AP.    not affected - worked around in hardware
+ *     8AP.    worked around in hardware - we get explicit CS errors if not
+ *     9AP.    only 'noapic' mode affected. Might generate spurious
+ *             interrupts, we log only the first one and count the
+ *             rest silently.
+ *     10AP.   not affected - worked around in hardware
+ *     11AP.   Linux reads the APIC between writes to avoid this, as per
+ *             the documentation. Make sure you preserve this as it affects
+ *             the C stepping chips too.
+ *     12AP.   not affected - worked around in hardware
+ *     13AP.   not affected - worked around in hardware
+ *     14AP.   we always deassert INIT during bootup
+ *     15AP.   not affected - worked around in hardware
+ *     16AP.   not affected - worked around in hardware
+ *     17AP.   not affected - worked around in hardware
+ *     18AP.   not affected - worked around in hardware
+ *     19AP.   not affected - worked around in BIOS
+ *
+ *     If this sounds worrying believe me these bugs are either ___RARE___,
+ *     or are signal timing bugs worked around in hardware and there's
+ *     about nothing of note with C stepping upwards.
+ */
+
+/* The 'big kernel lock' */
+spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+
+struct tlb_state cpu_tlbstate[NR_CPUS] __cacheline_aligned = {[0 ... NR_CPUS-1] = { &init_mm, 0, }};
+
+/*
+ * the following functions deal with sending IPIs between CPUs.
+ *
+ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+ */
+
+static inline int __prepare_ICR (unsigned int shortcut, int vector)
+{
+       return APIC_DM_FIXED | shortcut | vector | APIC_DEST_LOGICAL;
+}
+
+static inline int __prepare_ICR2 (unsigned int mask)
+{
+       return SET_APIC_DEST_FIELD(mask);
+}
+
+static inline void __send_IPI_shortcut(unsigned int shortcut, int vector)
+{
+       /*
+        * Subtle. In the case of the 'never do double writes' workaround
+        * we have to lock out interrupts to be safe.  As we don't care
+        * of the value read we use an atomic rmw access to avoid costly
+        * cli/sti.  Otherwise we use an even cheaper single atomic write
+        * to the APIC.
+        */
+       unsigned int cfg;
+
+       /*
+        * Wait for idle.
+        */
+       apic_wait_icr_idle();
+
+       /*
+        * No need to touch the target chip field
+        */
+       cfg = __prepare_ICR(shortcut, vector);
+
+       /*
+        * Send the IPI. The write to APIC_ICR fires this off.
+        */
+       apic_write_around(APIC_ICR, cfg);
+}
+
+static inline void send_IPI_allbutself(int vector)
+{
+       /*
+        * if there are no other CPUs in the system then
+        * we get an APIC send error if we try to broadcast.
+        * thus we have to avoid sending IPIs in this case.
+        */
+       if (smp_num_cpus > 1)
+               __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
+}
+
+static inline void send_IPI_all(int vector)
+{
+       __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
+}
+
+void send_IPI_self(int vector)
+{
+       __send_IPI_shortcut(APIC_DEST_SELF, vector);
+}
+
+static inline void send_IPI_mask(int mask, int vector)
+{
+       unsigned long cfg;
+       unsigned long flags;
+
+       __save_flags(flags);
+       __cli();
+
+       /*
+        * Wait for idle.
+        */
+       apic_wait_icr_idle();
+
+       /*
+        * prepare target chip field
+        */
+       cfg = __prepare_ICR2(mask);
+       apic_write_around(APIC_ICR2, cfg);
+
+       /*
+        * program the ICR 
+        */
+       cfg = __prepare_ICR(0, vector);
+       
+       /*
+        * Send the IPI. The write to APIC_ICR fires this off.
+        */
+       apic_write_around(APIC_ICR, cfg);
+       __restore_flags(flags);
+}
+
+/*
+ *     Smarter SMP flushing macros. 
+ *             c/o Linus Torvalds.
+ *
+ *     These mean you can really definitely utterly forget about
+ *     writing to user space from interrupts. (Its not allowed anyway).
+ *
+ *     Optimizations Manfred Spraul <manfred@colorfullife.com>
+ */
+
+static volatile unsigned long flush_cpumask;
+static struct mm_struct * flush_mm;
+static unsigned long flush_va;
+static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED;
+#define FLUSH_ALL      0xffffffff
+
+/*
+ * We cannot call mmdrop() because we are in interrupt context, 
+ * instead update mm->cpu_vm_mask.
+ */
+static void inline leave_mm (unsigned long cpu)
+{
+       if (cpu_tlbstate[cpu].state == TLBSTATE_OK)
+               BUG();
+       clear_bit(cpu, &cpu_tlbstate[cpu].active_mm->cpu_vm_mask);
+}
+
+/*
+ *
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
+ * 1) switch_mm() either 1a) or 1b)
+ * 1a) thread switch to a different mm
+ * 1a1) clear_bit(cpu, &old_mm->cpu_vm_mask);
+ *     Stop ipi delivery for the old mm. This is not synchronized with
+ *     the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ *     for the wrong mm, and in the worst case we perform a superflous
+ *     tlb flush.
+ * 1a2) set cpu_tlbstate to TLBSTATE_OK
+ *     Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ *     was in lazy tlb mode.
+ * 1a3) update cpu_tlbstate[].active_mm
+ *     Now cpu0 accepts tlb flushes for the new mm.
+ * 1a4) set_bit(cpu, &new_mm->cpu_vm_mask);
+ *     Now the other cpus will send tlb flush ipis.
+ * 1a4) change cr3.
+ * 1b) thread switch without mm change
+ *     cpu_tlbstate[].active_mm is correct, cpu0 already handles
+ *     flush ipis.
+ * 1b1) set cpu_tlbstate to TLBSTATE_OK
+ * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+ *     Atomically set the bit [other cpus will start sending flush ipis],
+ *     and test the bit.
+ * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+ * 2) switch %%esp, ie current
+ *
+ * The interrupt must handle 2 special cases:
+ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+ * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+ *   runs in kernel space, the cpu could load tlb entries for user space
+ *   pages.
+ *
+ * The good news is that cpu_tlbstate is local to each cpu, no
+ * write/read ordering problems.
+ */
+
+/*
+ * TLB flush IPI:
+ *
+ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+ * 2) Leave the mm if we are in the lazy tlb mode.
+ */
+
+asmlinkage void smp_invalidate_interrupt (void)
+{
+       unsigned long cpu = smp_processor_id();
+
+       if (!test_bit(cpu, &flush_cpumask))
+               return;
+               /* 
+                * This was a BUG() but until someone can quote me the
+                * line from the intel manual that guarantees an IPI to
+                * multiple CPUs is retried _only_ on the erroring CPUs
+                * its staying as a return
+                *
+                * BUG();
+                */
+                
+       if (flush_mm == cpu_tlbstate[cpu].active_mm) {
+               if (cpu_tlbstate[cpu].state == TLBSTATE_OK) {
+                       if (flush_va == FLUSH_ALL)
+                               local_flush_tlb();
+                       else
+                               __flush_tlb_one(flush_va);
+               } else
+                       leave_mm(cpu);
+       }
+       ack_APIC_irq();
+       clear_bit(cpu, &flush_cpumask);
+}
+
+static void flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
+                                               unsigned long va)
+{
+       /*
+        * A couple of (to be removed) sanity checks:
+        *
+        * - we do not send IPIs to not-yet booted CPUs.
+        * - current CPU must not be in mask
+        * - mask must exist :)
+        */
+       if (!cpumask)
+               BUG();
+       if ((cpumask & cpu_online_map) != cpumask)
+               BUG();
+       if (cpumask & (1 << smp_processor_id()))
+               BUG();
+       if (!mm)
+               BUG();
+
+       /*
+        * i'm not happy about this global shared spinlock in the
+        * MM hot path, but we'll see how contended it is.
+        * Temporarily this turns IRQs off, so that lockups are
+        * detected by the NMI watchdog.
+        */
+       spin_lock(&tlbstate_lock);
+       
+       flush_mm = mm;
+       flush_va = va;
+       atomic_set_mask(cpumask, &flush_cpumask);
+       /*
+        * We have to send the IPI only to
+        * CPUs affected.
+        */
+       send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+
+       while (flush_cpumask)
+               /* nothing. lockup detection does not belong here */;
+
+       flush_mm = NULL;
+       flush_va = 0;
+       spin_unlock(&tlbstate_lock);
+}
+       
+void flush_tlb_current_task(void)
+{
+       struct mm_struct *mm = current->mm;
+       unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
+
+       local_flush_tlb();
+       if (cpu_mask)
+               flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+}
+
+void flush_tlb_mm (struct mm_struct * mm)
+{
+       unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
+
+       if (current->active_mm == mm) {
+               if (current->mm)
+                       local_flush_tlb();
+               else
+                       leave_mm(smp_processor_id());
+       }
+       if (cpu_mask)
+               flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
+}
+
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{
+       struct mm_struct *mm = vma->vm_mm;
+       unsigned long cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
+
+       if (current->active_mm == mm) {
+               if(current->mm)
+                       __flush_tlb_one(va);
+                else
+                       leave_mm(smp_processor_id());
+       }
+
+       if (cpu_mask)
+               flush_tlb_others(cpu_mask, mm, va);
+}
+
+static inline void do_flush_tlb_all_local(void)
+{
+       unsigned long cpu = smp_processor_id();
+
+       __flush_tlb_all();
+       if (cpu_tlbstate[cpu].state == TLBSTATE_LAZY)
+               leave_mm(cpu);
+}
+
+static void flush_tlb_all_ipi(void* info)
+{
+       do_flush_tlb_all_local();
+}
+
+void flush_tlb_all(void)
+{
+       smp_call_function (flush_tlb_all_ipi,0,1,1);
+
+       do_flush_tlb_all_local();
+}
+
+static spinlock_t migration_lock = SPIN_LOCK_UNLOCKED;
+static task_t *new_task;
+
+/*
+ * This function sends a 'task migration' IPI to another CPU.
+ * Must be called from syscall contexts, with interrupts *enabled*.
+ */
+void smp_migrate_task(int cpu, task_t *p)
+{
+       /*
+        * The target CPU will unlock the migration spinlock:
+        */
+       _raw_spin_lock(&migration_lock);
+       new_task = p;
+       send_IPI_mask(1 << cpu, TASK_MIGRATION_VECTOR);
+}
+
+/*
+ * Task migration callback.
+ */
+asmlinkage void smp_task_migration_interrupt(void)
+{
+       task_t *p;
+
+       ack_APIC_irq();
+       p = new_task;
+       _raw_spin_unlock(&migration_lock);
+       sched_task_migrated(p);
+}
+/*
+ * this function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+void smp_send_reschedule(int cpu)
+{
+       send_IPI_mask(1 << cpu, RESCHEDULE_VECTOR);
+}
+
+/*
+ * this function sends a reschedule IPI to all (other) CPUs.
+ * This should only be used if some 'global' task became runnable,
+ * such as a RT task, that must be handled now. The first CPU
+ * that manages to grab the task will run it.
+ */
+void smp_send_reschedule_all(void)
+{
+       send_IPI_allbutself(RESCHEDULE_VECTOR);
+}
+
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise
+ * static memory requirements. It also looks cleaner.
+ */
+static spinlock_t call_lock = SPIN_LOCK_UNLOCKED;
+
+struct call_data_struct {
+       void (*func) (void *info);
+       void *info;
+       atomic_t started;
+       atomic_t finished;
+       int wait;
+};
+
+static struct call_data_struct * call_data;
+
+/*
+ * this function sends a 'generic call function' IPI to all other CPUs
+ * in the system.
+ */
+
+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+                       int wait)
+/*
+ * [SUMMARY] Run a function on all other CPUs.
+ * <func> The function to run. This must be fast and non-blocking.
+ * <info> An arbitrary pointer to pass to the function.
+ * <nonatomic> currently unused.
+ * <wait> If true, wait (atomically) until function has completed on other CPUs.
+ * [RETURNS] 0 on success, else a negative status code. Does not return until
+ * remote CPUs are nearly ready to execute <<func>> or are or have executed.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler, you may call it from a bottom half handler.
+ */
+{
+       struct call_data_struct data;
+       int cpus = smp_num_cpus-1;
+
+       if (!cpus)
+               return 0;
+
+       data.func = func;
+       data.info = info;
+       atomic_set(&data.started, 0);
+       data.wait = wait;
+       if (wait)
+               atomic_set(&data.finished, 0);
+
+       spin_lock_bh(&call_lock);
+       call_data = &data;
+       wmb();
+       /* Send a message to all other CPUs and wait for them to respond */
+       send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+       /* Wait for response */
+       while (atomic_read(&data.started) != cpus)
+               barrier();
+
+       if (wait)
+               while (atomic_read(&data.finished) != cpus)
+                       barrier();
+       spin_unlock_bh(&call_lock);
+
+       return 0;
+}
+
+static void stop_this_cpu (void * dummy)
+{
+       /*
+        * Remove this CPU:
+        */
+       clear_bit(smp_processor_id(), &cpu_online_map);
+       __cli();
+       disable_local_APIC();
+       for(;;) __asm__("hlt");
+       for (;;);
+}
+
+/*
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+
+void smp_send_stop(void)
+{
+       smp_call_function(stop_this_cpu, NULL, 1, 0);
+       smp_num_cpus = 1;
+
+       __cli();
+       disable_local_APIC();
+       __sti();
+}
+
+/*
+ * Reschedule call back. Nothing to do,
+ * all the work is done automatically when
+ * we return from the interrupt.
+ */
+asmlinkage void smp_reschedule_interrupt(void)
+{
+       ack_APIC_irq();
+}
+
+asmlinkage void smp_call_function_interrupt(void)
+{
+       void (*func) (void *info) = call_data->func;
+       void *info = call_data->info;
+       int wait = call_data->wait;
+
+       ack_APIC_irq();
+       /*
+        * Notify initiating CPU that I've grabbed the data and am
+        * about to execute the function
+        */
+       mb();
+       atomic_inc(&call_data->started);
+       /*
+        * At this point the info structure may be out of scope unless wait==1
+        */
+       (*func)(info);
+       if (wait) {
+               mb();
+               atomic_inc(&call_data->finished);
+       }
+}
+
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
new file mode 100644 (file)
index 0000000..c185cc8
--- /dev/null
@@ -0,0 +1,1023 @@
+/*
+ *     x86 SMP booting functions
+ *
+ *     (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *     (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *     Copyright 2001 Andi Kleen, SuSE Labs.
+ *
+ *     Much of the core SMP work is based on previous work by Thomas Radke, to
+ *     whom a great many thanks are extended.
+ *
+ *     Thanks to Intel for making available several different Pentium,
+ *     Pentium Pro and Pentium-II/Xeon MP machines.
+ *     Original development of Linux SMP code supported by Caldera.
+ *
+ *     This code is released under the GNU General Public License version 2 or
+ *     later.
+ *
+ *     Fixes
+ *             Felix Koop      :       NR_CPUS used properly
+ *             Jose Renau      :       Handle single CPU case.
+ *             Alan Cox        :       By repeated request 8) - Total BogoMIP report.
+ *             Greg Wright     :       Fix for kernel stacks panic.
+ *             Erich Boleyn    :       MP v1.4 and additional changes.
+ *     Matthias Sattler        :       Changes for 2.1 kernel map.
+ *     Michel Lespinasse       :       Changes for 2.1 kernel map.
+ *     Michael Chastain        :       Change trampoline.S to gnu as.
+ *             Alan Cox        :       Dumb bug: 'B' step PPro's are fine
+ *             Ingo Molnar     :       Added APIC timers, based on code
+ *                                     from Jose Renau
+ *             Ingo Molnar     :       various cleanups and rewrites
+ *             Tigran Aivazian :       fixed "0.00 in /proc/uptime on SMP" bug.
+ *     Maciej W. Rozycki       :       Bits for genuine 82489DX APICs
+ *     Andi Kleen              :       Changed for SMP boot into long mode.
+ */
+
+#include <linux/config.h>
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/kernel_stat.h>
+#include <linux/smp_lock.h>
+#include <linux/irq.h>
+#include <linux/bootmem.h>
+
+#include <linux/delay.h>
+#include <linux/mc146818rtc.h>
+#include <asm/mtrr.h>
+#include <asm/pgalloc.h>
+#include <asm/desc.h>
+
+/* Set if we find a B stepping CPU                     */
+static int smp_b_stepping;
+
+/* Setup configured maximum number of CPUs to activate */
+static int max_cpus = -1;
+
+/* Total count of live CPUs */
+int smp_num_cpus = 1;
+
+/* Bitmask of currently online CPUs */
+unsigned long cpu_online_map;
+
+/* which CPU (physical APIC ID) maps to which logical CPU number */
+volatile int x86_apicid_to_cpu[NR_CPUS];
+/* which logical CPU number maps to which CPU (physical APIC ID) */
+volatile int x86_cpu_to_apicid[NR_CPUS];
+
+static volatile unsigned long cpu_callin_map;
+static volatile unsigned long cpu_callout_map;
+
+/* Per CPU bogomips and other parameters */
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+
+/* Set when the idlers are all forked */
+int smp_threads_ready;
+
+/*
+ * Setup routine for controlling SMP activation
+ *
+ * Command-line option of "nosmp" or "maxcpus=0" will disable SMP
+ * activation entirely (the MPS table probe still happens, though).
+ *
+ * Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
+ * greater than 0, limits the maximum number of CPUs activated in
+ * SMP mode to <NUM>.
+ */
+
+static int __init nosmp(char *str)
+{
+       max_cpus = 0;
+       return 1;
+}
+
+__setup("nosmp", nosmp);
+
+static int __init maxcpus(char *str)
+{
+       get_option(&str, &max_cpus);
+       return 1;
+}
+
+__setup("maxcpus=", maxcpus);
+
+/*
+ * Trampoline 80x86 program as an array.
+ */
+
+extern unsigned char trampoline_data [];
+extern unsigned char trampoline_end  [];
+static unsigned char *trampoline_base;
+
+/*
+ * Currently trivial. Write the real->protected mode
+ * bootstrap into the page concerned. The caller
+ * has made sure it's suitably aligned.
+ */
+
+static unsigned long __init setup_trampoline(void)
+{
+       extern __u32 tramp_gdt_ptr; 
+       tramp_gdt_ptr = (__u32)virt_to_phys(&gdt_table); 
+       memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
+       return virt_to_phys(trampoline_base);
+}
+
+/*
+ * We are called very early to get the low memory for the
+ * SMP bootup trampoline page.
+ */
+void __init smp_alloc_memory(void)
+{
+       trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
+       /*
+        * Has to be in very low memory so we can execute
+        * real-mode AP code.
+        */
+       if (__pa(trampoline_base) >= 0x9F000)
+               BUG();
+}
+
+/*
+ * The bootstrap kernel entry code has set these up. Save them for
+ * a given CPU
+ */
+
+void __init smp_store_cpu_info(int id)
+{
+       struct cpuinfo_x86 *c = cpu_data + id;
+
+       *c = boot_cpu_data;
+       identify_cpu(c);
+       /*
+        * Mask B, Pentium, but not Pentium MMX
+        */
+       if (c->x86_vendor == X86_VENDOR_INTEL &&
+           c->x86 == 5 &&
+           c->x86_mask >= 1 && c->x86_mask <= 4 &&
+           c->x86_model <= 3)
+               /*
+                * Remember we have B step Pentia with bugs
+                */
+               smp_b_stepping = 1;
+}
+
+/*
+ * Architecture specific routine called by the kernel just before init is
+ * fired off. This allows the BP to have everything in order [we hope].
+ * At the end of this all the APs will hit the system scheduling and off
+ * we go. Each AP will load the system gdt's and jump through the kernel
+ * init into idle(). At this point the scheduler will one day take over
+ * and give them jobs to do. smp_callin is a standard routine
+ * we use to track CPUs as they power up.
+ */
+
+static atomic_t smp_commenced = ATOMIC_INIT(0);
+
+void __init smp_commence(void)
+{
+       /*
+        * Lets the callins below out of their loop.
+        */
+       Dprintk("Setting commenced=1, go go go\n");
+
+       wmb();
+       atomic_set(&smp_commenced,1);
+}
+
+/*
+ * TSC synchronization.
+ *
+ * We first check wether all CPUs have their TSC's synchronized,
+ * then we print a warning if not, and always resync.
+ */
+
+static atomic_t tsc_start_flag = ATOMIC_INIT(0);
+static atomic_t tsc_count_start = ATOMIC_INIT(0);
+static atomic_t tsc_count_stop = ATOMIC_INIT(0);
+static unsigned long long tsc_values[NR_CPUS];
+
+#define NR_LOOPS 5
+
+extern unsigned long fast_gettimeoffset_quotient;
+
+/*
+ * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit
+ * multiplication. Not terribly optimized but we need it at boot time only
+ * anyway.
+ *
+ * result == a / b
+ *     == (a1 + a2*(2^32)) / b
+ *     == a1/b + a2*(2^32/b)
+ *     == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b
+ *                 ^---- (this multiplication can overflow)
+ */
+
+static unsigned long long div64 (unsigned long long a, unsigned long b0)
+{
+       unsigned int a1, a2;
+       unsigned long long res;
+
+       a1 = ((unsigned int*)&a)[0];
+       a2 = ((unsigned int*)&a)[1];
+
+       res = a1/b0 +
+               (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) +
+               a2 / b0 +
+               (a2 * (0xffffffff % b0)) / b0;
+
+       return res;
+}
+
+static void __init synchronize_tsc_bp (void)
+{
+       int i;
+       unsigned long long t0;
+       unsigned long long sum, avg;
+       long long delta;
+       unsigned long one_usec;
+       int buggy = 0;
+
+       printk("checking TSC synchronization across CPUs: ");
+
+       one_usec = ((1<<30)/fast_gettimeoffset_quotient)*(1<<2);
+
+       atomic_set(&tsc_start_flag, 1);
+       wmb();
+
+       /*
+        * We loop a few times to get a primed instruction cache,
+        * then the last pass is more or less synchronized and
+        * the BP and APs set their cycle counters to zero all at
+        * once. This reduces the chance of having random offsets
+        * between the processors, and guarantees that the maximum
+        * delay between the cycle counters is never bigger than
+        * the latency of information-passing (cachelines) between
+        * two CPUs.
+        */
+       for (i = 0; i < NR_LOOPS; i++) {
+               /*
+                * all APs synchronize but they loop on '== num_cpus'
+                */
+               while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb();
+               atomic_set(&tsc_count_stop, 0);
+               wmb();
+               /*
+                * this lets the APs save their current TSC:
+                */
+               atomic_inc(&tsc_count_start);
+
+               rdtscll(tsc_values[smp_processor_id()]);
+               /*
+                * We clear the TSC in the last loop:
+                */
+               if (i == NR_LOOPS-1)
+                       write_tsc(0, 0);
+
+               /*
+                * Wait for all APs to leave the synchronization point:
+                */
+               while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb();
+               atomic_set(&tsc_count_start, 0);
+               wmb();
+               atomic_inc(&tsc_count_stop);
+       }
+
+       sum = 0;
+       for (i = 0; i < smp_num_cpus; i++) {
+               t0 = tsc_values[i];
+               sum += t0;
+       }
+       avg = div64(sum, smp_num_cpus);
+
+       sum = 0;
+       for (i = 0; i < smp_num_cpus; i++) {
+               delta = tsc_values[i] - avg;
+               if (delta < 0)
+                       delta = -delta;
+               /*
+                * We report bigger than 2 microseconds clock differences.
+                */
+               if (delta > 2*one_usec) {
+                       long realdelta;
+                       if (!buggy) {
+                               buggy = 1;
+                               printk("\n");
+                       }
+                       realdelta = div64(delta, one_usec);
+                       if (tsc_values[i] < avg)
+                               realdelta = -realdelta;
+
+                       printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n", i, realdelta);
+               }
+
+               sum += delta;
+       }
+       if (!buggy)
+               printk("passed.\n");
+               ;
+}
+
+static void __init synchronize_tsc_ap (void)
+{
+       int i;
+
+       /*
+        * smp_num_cpus is not necessarily known at the time
+        * this gets called, so we first wait for the BP to
+        * finish SMP initialization:
+        */
+       while (!atomic_read(&tsc_start_flag)) mb();
+
+       for (i = 0; i < NR_LOOPS; i++) {
+               atomic_inc(&tsc_count_start);
+               while (atomic_read(&tsc_count_start) != smp_num_cpus) mb();
+
+               rdtscll(tsc_values[smp_processor_id()]);
+               if (i == NR_LOOPS-1)
+                       write_tsc(0, 0);
+
+               atomic_inc(&tsc_count_stop);
+               while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb();
+       }
+}
+#undef NR_LOOPS
+
+extern void calibrate_delay(void);
+
+static atomic_t init_deasserted;
+
+void __init smp_callin(void)
+{
+       int cpuid, phys_id;
+       unsigned long timeout;
+
+       /*
+        * If waken up by an INIT in an 82489DX configuration
+        * we may get here before an INIT-deassert IPI reaches
+        * our local APIC.  We have to wait for the IPI or we'll
+        * lock up on an APIC access.
+        */
+       while (!atomic_read(&init_deasserted));
+
+       /*
+        * (This works even if the APIC is not enabled.)
+        */
+       phys_id = GET_APIC_ID(apic_read(APIC_ID));
+       cpuid = smp_processor_id();
+       if (test_and_set_bit(cpuid, &cpu_online_map)) {
+               printk("huh, phys CPU#%d, CPU#%d already present??\n",
+                                       phys_id, cpuid);
+               BUG();
+       }
+       Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
+
+       /*
+        * STARTUP IPIs are fragile beasts as they might sometimes
+        * trigger some glue motherboard logic. Complete APIC bus
+        * silence for 1 second, this overestimates the time the
+        * boot CPU is spending to send the up to 2 STARTUP IPIs
+        * by a factor of two. This should be enough.
+        */
+
+       /*
+        * Waiting 2s total for startup (udelay is not yet working)
+        */
+       timeout = jiffies + 2*HZ;
+       while (time_before(jiffies, timeout)) {
+               /*
+                * Has the boot CPU finished it's STARTUP sequence?
+                */
+               if (test_bit(cpuid, &cpu_callout_map))
+                       break;
+               rep_nop();
+       }
+
+       if (!time_before(jiffies, timeout)) {
+               printk("BUG: CPU%d started up but did not get a callout!\n",
+                       cpuid);
+               BUG();
+       }
+
+       /*
+        * the boot CPU has finished the init stage and is spinning
+        * on callin_map until we finish. We are free to set up this
+        * CPU, first the APIC. (this is probably redundant on most
+        * boards)
+        */
+
+       Dprintk("CALLIN, before setup_local_APIC().\n");
+       setup_local_APIC();
+
+       sti();
+
+#ifdef CONFIG_MTRR
+       /*
+        * Must be done before calibration delay is computed
+        */
+       mtrr_init_secondary_cpu ();
+#endif
+       /*
+        * Get our bogomips.
+        */
+       calibrate_delay();
+       Dprintk("Stack at about %p\n",&cpuid);
+
+       /*
+        * Save our processor parameters
+        */
+       smp_store_cpu_info(cpuid);
+
+       disable_APIC_timer();
+       /*
+        * Allow the master to continue.
+        */
+       set_bit(cpuid, &cpu_callin_map);
+
+       /*
+        *      Synchronize the TSC with the BP
+        */
+       if (cpu_has_tsc)
+               synchronize_tsc_ap();
+}
+
+int cpucount;
+
+extern int cpu_idle(void);
+
+/*
+ * Activate a secondary processor.
+ */
+int __init start_secondary(void *unused)
+{
+       /*
+        * Dont put anything before smp_callin(), SMP
+        * booting is too fragile that we want to limit the
+        * things done here to the most necessary things.
+        */
+       cpu_init();
+       smp_callin();
+       while (!atomic_read(&smp_commenced))
+               rep_nop();
+       enable_APIC_timer();
+       /*
+        * low-memory mappings have been cleared, flush them from
+        * the local TLBs too.
+        */
+       local_flush_tlb();
+
+       return cpu_idle();
+}
+
+/*
+ * Everything has been set up for the secondary
+ * CPUs - they just need to reload everything
+ * from the task structure
+ * This function must not return.
+ */
+void __init initialize_secondary(void)
+{
+       /*
+        * We don't actually need to load the full TSS,
+        * basically just the stack pointer and the eip.
+        */
+
+       asm volatile(
+               "movq %0,%%rsp\n\t"
+               "jmp *%1"
+               :
+               :"r" (current->thread.rsp),"r" (current->thread.rip));
+}
+
+extern void *init_rsp; 
+extern void (*initial_code)(void);
+
+static int __init fork_by_hand(void)
+{
+       struct pt_regs regs;
+       /*
+        * don't care about the eip and regs settings since
+        * we'll never reschedule the forked task.
+        */
+       return do_fork(CLONE_VM|CLONE_PID, 0, &regs, 0);
+}
+
+#if APIC_DEBUG
+static inline void inquire_remote_apic(int apicid)
+{
+       int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
+       char *names[] = { "ID", "VERSION", "SPIV" };
+       int timeout, status;
+
+       printk("Inquiring remote APIC #%d...\n", apicid);
+
+       for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+               printk("... APIC #%d %s: ", apicid, names[i]);
+
+               /*
+                * Wait for idle.
+                */
+               apic_wait_icr_idle();
+
+               apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+               apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
+
+               timeout = 0;
+               do {
+                       udelay(100);
+                       status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
+               } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
+
+               switch (status) {
+               case APIC_ICR_RR_VALID:
+                       status = apic_read(APIC_RRR);
+                       printk("%08x\n", status);
+                       break;
+               default:
+                       printk("failed\n");
+               }
+       }
+}
+#endif
+
+static void __init do_boot_cpu (int apicid)
+{
+       struct task_struct *idle;
+       unsigned long send_status, accept_status, boot_status, maxlvt;
+       int timeout, num_starts, j, cpu;
+       unsigned long start_eip;
+
+       cpu = ++cpucount;
+       /*
+        * We can't use kernel_thread since we must avoid to
+        * reschedule the child.
+        */
+       if (fork_by_hand() < 0)
+               panic("failed fork for CPU %d", cpu);
+
+       /*
+        * We remove it from the pidhash and the runqueue
+        * once we got the process:
+        */
+       idle = init_task.prev_task;
+       if (!idle)
+               panic("No idle process for CPU %d", cpu);
+
+       init_idle(idle,cpu);
+
+       x86_cpu_to_apicid[cpu] = apicid;
+       x86_apicid_to_cpu[apicid] = cpu;
+       idle->thread.rip = (unsigned long) start_secondary;
+
+       init_rsp = (void *) (THREAD_SIZE + (char *)idle->thread_info);
+
+       unhash_process(idle);
+       cpu_pda[cpu].pcurrent = idle;
+       cpu_pda[cpu].kernelstack = init_rsp - PDA_STACKOFFSET; 
+
+       /* start_eip had better be page-aligned! */
+       start_eip = setup_trampoline();
+
+       /* So we see what's up   */
+       printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip);
+
+       initial_code = initialize_secondary;
+
+       /*
+        * This grunge runs the startup process for
+        * the targeted processor.
+        */
+
+       atomic_set(&init_deasserted, 0);
+
+       Dprintk("Setting warm reset code and vector.\n");
+
+       CMOS_WRITE(0xa, 0xf);
+       local_flush_tlb();
+       Dprintk("1.\n");
+       *((volatile unsigned short *) phys_to_virt(0x469)) = start_eip >> 4;
+       Dprintk("2.\n");
+       *((volatile unsigned short *) phys_to_virt(0x467)) = start_eip & 0xf;
+       Dprintk("3.\n");
+
+       /*
+        * Be paranoid about clearing APIC errors.
+        */
+       if (APIC_INTEGRATED(apic_version[apicid])) {
+               apic_read_around(APIC_SPIV);
+               apic_write(APIC_ESR, 0);
+               apic_read(APIC_ESR);
+       }
+
+       /*
+        * Status is now clean
+        */
+       send_status = 0;
+       accept_status = 0;
+       boot_status = 0;
+
+       /*
+        * Starting actual IPI sequence...
+        */
+
+       Dprintk("Asserting INIT.\n");
+
+       /*
+        * Turn INIT on target chip
+        */
+       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+
+       /*
+        * Send IPI
+        */
+       apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
+                               | APIC_DM_INIT);
+
+       Dprintk("Waiting for send to finish...\n");
+       timeout = 0;
+       do {
+               Dprintk("+");
+               udelay(100);
+               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+       } while (send_status && (timeout++ < 1000));
+
+       mdelay(10);
+
+       Dprintk("Deasserting INIT.\n");
+
+       /* Target chip */
+       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+
+       /* Send IPI */
+       apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
+
+       Dprintk("Waiting for send to finish...\n");
+       timeout = 0;
+       do {
+               Dprintk("+");
+               udelay(100);
+               send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+       } while (send_status && (timeout++ < 1000));
+
+       atomic_set(&init_deasserted, 1);
+
+       /*
+        * Should we send STARTUP IPIs ?
+        *
+        * Determine this based on the APIC version.
+        * If we don't have an integrated APIC, don't
+        * send the STARTUP IPIs.
+        */
+       if (APIC_INTEGRATED(apic_version[apicid]))
+               num_starts = 2;
+       else
+               num_starts = 0;
+
+       /*
+        * Run STARTUP IPI loop.
+        */
+       Dprintk("#startup loops: %d.\n", num_starts);
+
+       maxlvt = get_maxlvt();
+
+       for (j = 1; j <= num_starts; j++) {
+               Dprintk("Sending STARTUP #%d.\n",j);
+               apic_read_around(APIC_SPIV);
+               apic_write(APIC_ESR, 0);
+               apic_read(APIC_ESR);
+               Dprintk("After apic_write.\n");
+
+               /*
+                * STARTUP IPI
+                */
+
+               /* Target chip */
+               apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
+
+               /* Boot on the stack */
+               /* Kick the second */
+               apic_write_around(APIC_ICR, APIC_DM_STARTUP
+                                       | (start_eip >> 12));
+
+               /*
+                * Give the other CPU some time to accept the IPI.
+                */
+               udelay(300);
+
+               Dprintk("Startup point 1.\n");
+
+               Dprintk("Waiting for send to finish...\n");
+               timeout = 0;
+               do {
+                       Dprintk("+");
+                       udelay(100);
+                       send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+               } while (send_status && (timeout++ < 1000));
+
+               /*
+                * Give the other CPU some time to accept the IPI.
+                */
+               udelay(200);
+               /*
+                * Due to the Pentium erratum 3AP.
+                */
+               if (maxlvt > 3) {
+                       apic_read_around(APIC_SPIV);
+                       apic_write(APIC_ESR, 0);
+               }
+               accept_status = (apic_read(APIC_ESR) & 0xEF);
+               if (send_status || accept_status)
+                       break;
+       }
+       Dprintk("After Startup.\n");
+
+       if (send_status)
+               printk("APIC never delivered???\n");
+       if (accept_status)
+               printk("APIC delivery error (%lx).\n", accept_status);
+
+       if (!send_status && !accept_status) {
+               /*
+                * allow APs to start initializing.
+                */
+               Dprintk("Before Callout %d.\n", cpu);
+               set_bit(cpu, &cpu_callout_map);
+               Dprintk("After Callout %d.\n", cpu);
+
+               /*
+                * Wait 5s total for a response
+                */
+               for (timeout = 0; timeout < 50000; timeout++) {
+                       if (test_bit(cpu, &cpu_callin_map))
+                               break;  /* It has booted */
+                       udelay(100);
+               }
+
+               if (test_bit(cpu, &cpu_callin_map)) {
+                       /* number CPUs logically, starting from 1 (BSP is 0) */
+                       Dprintk("OK.\n");
+                       printk("CPU%d: ", cpu);
+                       print_cpu_info(&cpu_data[cpu]);
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_status = 1;
+                       if (*((volatile unsigned char *)phys_to_virt(8192))
+                                       == 0xA5)
+                               /* trampoline started but...? */
+                               printk("Stuck ??\n");
+                       else
+                               /* trampoline code not run */
+                               printk("Not responding.\n");
+#if APIC_DEBUG
+                       inquire_remote_apic(apicid);
+#endif
+               }
+       }
+       if (send_status || accept_status || boot_status) {
+               x86_cpu_to_apicid[cpu] = -1;
+               x86_apicid_to_cpu[apicid] = -1;
+               cpucount--;
+       }
+
+       /* mark "stuck" area as not stuck */
+       *((volatile unsigned long *)phys_to_virt(8192)) = 0;
+}
+
+cycles_t cacheflush_time;
+unsigned long cache_decay_ticks;
+
+static void smp_tune_scheduling (void)
+{
+       unsigned long cachesize;       /* kB   */
+       unsigned long bandwidth = 350; /* MB/s */
+       /*
+        * Rough estimation for SMP scheduling, this is the number of
+        * cycles it takes for a fully memory-limited process to flush
+        * the SMP-local cache.
+        *
+        * (For a P5 this pretty much means we will choose another idle
+        *  CPU almost always at wakeup time (this is due to the small
+        *  L1 cache), on PIIs it's around 50-100 usecs, depending on
+        *  the cache size)
+        */
+
+       if (!cpu_khz) {
+               /*
+                * this basically disables processor-affinity
+                * scheduling on SMP without a TSC.
+                */
+               cacheflush_time = 0;
+               return;
+       } else {
+               cachesize = boot_cpu_data.x86_cache_size;
+               if (cachesize == -1) {
+                       cachesize = 16; /* Pentiums, 2x8kB cache */
+                       bandwidth = 100;
+               }
+
+               cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
+       }
+
+       cache_decay_ticks = (long)cacheflush_time/cpu_khz * HZ / 1000;
+
+       printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
+               (long)cacheflush_time/(cpu_khz/1000),
+               ((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
+       printk("task migration cache decay timeout: %ld msecs.\n",
+               (cache_decay_ticks + 1) * 1000 / HZ);
+}
+
+/*
+ * Cycle through the processors sending APIC IPIs to boot each.
+ */
+
+extern int prof_multiplier[NR_CPUS];
+extern int prof_old_multiplier[NR_CPUS];
+extern int prof_counter[NR_CPUS];
+
+void __init smp_boot_cpus(void)
+{
+       int apicid, cpu;
+
+#ifdef CONFIG_MTRR
+       /*  Must be done before other processors booted  */
+       mtrr_init_boot_cpu ();
+#endif
+       /*
+        * Initialize the logical to physical CPU number mapping
+        * and the per-CPU profiling counter/multiplier
+        */
+
+       for (apicid = 0; apicid < NR_CPUS; apicid++) {
+               x86_apicid_to_cpu[apicid] = -1;
+               prof_counter[apicid] = 1;
+               prof_old_multiplier[apicid] = 1;
+               prof_multiplier[apicid] = 1;
+       }
+
+       /*
+        * Setup boot CPU information
+        */
+       smp_store_cpu_info(0); /* Final full version of the data */
+       printk("CPU%d: ", 0);
+       print_cpu_info(&cpu_data[0]);
+
+       /*
+        * We have the boot CPU online for sure.
+        */
+       set_bit(0, &cpu_online_map);
+       x86_apicid_to_cpu[boot_cpu_id] = 0;
+       x86_cpu_to_apicid[0] = boot_cpu_id;
+       global_irq_holder = NO_PROC_ID;
+       current_thread_info()->cpu = 0;
+       smp_tune_scheduling();
+
+       /*
+        * If we couldnt find an SMP configuration at boot time,
+        * get out of here now!
+        */
+       if (!smp_found_config) {
+               printk(KERN_NOTICE "SMP motherboard not detected.\n");
+               io_apic_irqs = 0;
+               cpu_online_map = phys_cpu_present_map = 1;
+               smp_num_cpus = 1;
+               if (APIC_init_uniprocessor())
+                       printk(KERN_NOTICE "Local APIC not detected."
+                                          " Using dummy APIC emulation.\n");
+               goto smp_done;
+       }
+
+       /*
+        * Should not be necessary because the MP table should list the boot
+        * CPU too, but we do it for the sake of robustness anyway.
+        */
+       if (!test_bit(boot_cpu_id, &phys_cpu_present_map)) {
+               printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
+                                                                boot_cpu_id);
+               phys_cpu_present_map |= (1 << hard_smp_processor_id());
+       }
+
+       /*
+        * If we couldn't find a local APIC, then get out of here now!
+        */
+       if (APIC_INTEGRATED(apic_version[boot_cpu_id]) &&
+           !test_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability)) {
+               printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
+                       boot_cpu_id);
+               printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
+               io_apic_irqs = 0;
+               cpu_online_map = phys_cpu_present_map = 1;
+               smp_num_cpus = 1;
+               goto smp_done;
+       }
+
+       verify_local_APIC();
+
+       /*
+        * If SMP should be disabled, then really disable it!
+        */
+       if (!max_cpus) {
+               smp_found_config = 0;
+               printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
+               io_apic_irqs = 0;
+               cpu_online_map = phys_cpu_present_map = 1;
+               smp_num_cpus = 1;
+               goto smp_done;
+       }
+
+       connect_bsp_APIC();
+       setup_local_APIC();
+
+       if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id)
+               BUG();
+
+       /*
+        * Now scan the CPU present map and fire up the other CPUs.
+        */
+       Dprintk("CPU present map: %lx\n", phys_cpu_present_map);
+
+       for (apicid = 0; apicid < NR_CPUS; apicid++) {
+               /*
+                * Don't even attempt to start the boot CPU!
+                */
+               if (apicid == boot_cpu_id)
+                       continue;
+
+               if (!(phys_cpu_present_map & (1 << apicid)))
+                       continue;
+               if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
+                       continue;
+
+               do_boot_cpu(apicid);
+
+               /*
+                * Make sure we unmap all failed CPUs
+                */
+               if ((x86_apicid_to_cpu[apicid] == -1) &&
+                               (phys_cpu_present_map & (1 << apicid)))
+                       printk("phys CPU #%d not responding - cannot use it.\n",apicid);
+       }
+
+       /*
+        * Cleanup possible dangling ends...
+        */
+       {
+               /*
+                * Install writable page 0 entry to set BIOS data area.
+                */
+               local_flush_tlb();
+
+               /*
+                * Paranoid:  Set warm reset code and vector here back
+                * to default values.
+                */
+               CMOS_WRITE(0, 0xf);
+
+               *((volatile long *) phys_to_virt(0x467)) = 0;
+       }
+
+       /*
+        * Allow the user to impress friends.
+        */
+
+       Dprintk("Before bogomips.\n");
+       if (!cpucount) {
+               printk(KERN_ERR "Error: only one processor found.\n");
+       } else {
+               unsigned long bogosum = 0;
+               for (cpu = 0; cpu < NR_CPUS; cpu++)
+                       if (cpu_online_map & (1<<cpu))
+                               bogosum += cpu_data[cpu].loops_per_jiffy;
+               printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+                       cpucount+1,
+                       bogosum/(500000/HZ),
+                       (bogosum/(5000/HZ))%100);
+               Dprintk("Before bogocount - setting activated=1.\n");
+       }
+       smp_num_cpus = cpucount + 1;
+
+       if (smp_b_stepping)
+               printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
+       Dprintk("Boot done.\n");
+
+       /*
+        * Here we can be sure that there is an IO-APIC in the system. Let's
+        * go and set it up:
+        */
+       if (!skip_ioapic_setup && nr_ioapics)
+               setup_IO_APIC();
+
+       /*
+        * Set up all local APIC timers in the system:
+        */
+       setup_APIC_clocks();
+
+       /*
+        * Synchronize the TSC with the AP
+        */
+       if (cpu_has_tsc && cpucount)
+               synchronize_tsc_bp();
+
+smp_done:
+       zap_low_mappings();
+}
diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c
new file mode 100644 (file)
index 0000000..7a59684
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * linux/arch/x86_64/kernel/sys_x86_64.c
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+
+#include <asm/uaccess.h>
+#include <asm/ipc.h>
+
+/*
+ * sys_pipe() is the normal C calling standard for creating
+ * a pipe. It's not the way Unix traditionally does this, though.
+ */
+asmlinkage long sys_pipe(unsigned long * fildes)
+{
+       int fd[2];
+       int error;
+
+       error = do_pipe(fd);
+       if (!error) {
+               if (copy_to_user(fildes, fd, 2*sizeof(int)))
+                       error = -EFAULT;
+       }
+       return error;
+}
+
+long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
+       unsigned long fd, unsigned long off)
+{
+       long error;
+       struct file * file;
+
+       error = -EINVAL;
+       if (off & ~PAGE_MASK)
+               goto out;
+
+       error = -EBADF;
+       file = NULL;
+       flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
+       if (!(flags & MAP_ANONYMOUS)) {
+               file = fget(fd);
+               if (!file)
+                       goto out;
+       }
+
+       down_write(&current->mm->mmap_sem);
+       error = do_mmap_pgoff(file, addr, len, prot, flags, off >> PAGE_SHIFT);
+       up_write(&current->mm->mmap_sem);
+
+       if (file)
+               fput(file);
+out:
+       return error;
+}
+
+unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+       struct vm_area_struct *vma;
+       unsigned long end = TASK_SIZE;
+
+       if (flags & MAP_32BIT) 
+               end = 0xffffffff;
+       if (len > TASK_SIZE)
+               return -ENOMEM;
+       if (!addr) { 
+               addr = TASK_UNMAPPED_64;
+               if (test_thread_flag(TIF_IA32) || (flags & MAP_32BIT)) {
+                       addr = TASK_UNMAPPED_32;
+               }
+       } 
+       addr = PAGE_ALIGN(addr);
+
+       for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
+               /* At this point:  (!vma || addr < vma->vm_end). */
+               if (TASK_SIZE - len < addr)
+                       return -ENOMEM;
+               if (!vma || addr + len <= vma->vm_start)
+                       return addr;
+               addr = vma->vm_end;
+       }
+}
+
+/*
+ * Old cruft
+ */
+asmlinkage long sys_uname(struct old_utsname * name)
+{
+       int err;
+       if (!name)
+               return -EFAULT;
+       down_read(&uts_sem);
+       err=copy_to_user(name, &system_utsname, sizeof (*name));
+       up_read(&uts_sem);
+       return err?-EFAULT:0;
+}
+
+asmlinkage long sys_pause(void)
+{
+       current->state = TASK_INTERRUPTIBLE;
+       schedule();
+       return -ERESTARTNOHAND;
+}
diff --git a/arch/x86_64/kernel/syscall.c b/arch/x86_64/kernel/syscall.c
new file mode 100644 (file)
index 0000000..2d3f35e
--- /dev/null
@@ -0,0 +1,25 @@
+/* System call table for x86-64. */ 
+
+#include <linux/linkage.h>
+#include <linux/sys.h>
+#include <linux/cache.h>
+
+#define __NO_STUBS
+
+#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; 
+#undef _ASM_X86_64_UNISTD_H_
+#include <asm-x86_64/unistd.h>
+
+#undef __SYSCALL
+#define __SYSCALL(nr, sym) [ nr ] = sym, 
+#undef _ASM_X86_64_UNISTD_H_
+
+typedef void (*sys_call_ptr_t)(void); 
+
+extern void sys_ni_syscall(void);
+
+sys_call_ptr_t sys_call_table[__NR_syscall_max+1] __cacheline_aligned = { 
+       /* Smells like a like a compiler bug -- it doesn't work when the & below is removed. */ 
+       [0 ... __NR_syscall_max] = &sys_ni_syscall,
+#include <asm-x86_64/unistd.h>
+};
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
new file mode 100644 (file)
index 0000000..39a2fe7
--- /dev/null
@@ -0,0 +1,494 @@
+/*
+ *  linux/arch/x86-64/kernel/time.c
+ *
+ *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ *
+ * This file contains the PC-specific time handling details:
+ * reading the RTC at bootup, etc..
+ * 1994-07-02    Alan Modra
+ *     fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
+ * 1995-03-26    Markus Kuhn
+ *      fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
+ *      precision CMOS clock update
+ * 1996-05-03    Ingo Molnar
+ *      fixed time warps in do_[slow|fast]_gettimeoffset()
+ * 1997-09-10  Updated NTP code according to technical memorandum Jan '96
+ *             "A Kernel Model for Precision Timekeeping" by Dave Mills
+ * 1998-09-05    (Various)
+ *     More robust do_fast_gettimeoffset() algorithm implemented
+ *     (works with APM, Cyrix 6x86MX and Centaur C6),
+ *     monotonic gettimeofday() with fast_get_timeoffset(),
+ *     drift-proof precision TSC calibration on boot
+ *     (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
+ *     Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
+ *     ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
+ * 1998-12-16    Andrea Arcangeli
+ *     Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
+ *     because was not accounting lost_ticks.
+ * 1998-12-24 Copyright (C) 1998  Andrea Arcangeli
+ *     Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
+ *     serialize accesses to xtime/lost_ticks).
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/irq.h>
+#include <asm/msr.h>
+#include <asm/delay.h>
+#include <asm/mpspec.h>
+#include <asm/uaccess.h>
+#include <asm/processor.h>
+
+#include <linux/mc146818rtc.h>
+#include <linux/timex.h>
+#include <linux/config.h>
+
+#include <asm/fixmap.h>
+
+/*
+ * for x86_do_profile()
+ */
+#include <linux/irq.h>
+
+
+unsigned long cpu_khz; /* Detected as we calibrate the TSC */
+
+/* Number of usecs that the last interrupt was delayed */
+int __delay_at_last_interrupt __section_delay_at_last_interrupt;
+
+unsigned long __last_tsc_low __section_last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
+
+/* Cached *multiplier* to convert TSC counts to microseconds.
+ * (see the equation below).
+ * Equal to 2^32 * (1 / (clocks per usec) ).
+ * Initialized in time_init.
+ */
+unsigned long __fast_gettimeoffset_quotient __section_fast_gettimeoffset_quotient;
+
+extern rwlock_t xtime_lock;
+struct timeval __xtime __section_xtime;
+volatile unsigned long __jiffies __section_jiffies;
+unsigned long __wall_jiffies __section_wall_jiffies;
+struct timezone __sys_tz __section_sys_tz;
+spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
+
+static inline unsigned long do_gettimeoffset(void)
+{
+       register unsigned long eax, edx;
+
+       /* Read the Time Stamp Counter */
+
+       rdtsc(eax,edx);
+
+       /* .. relative to previous jiffy (32 bits is enough) */
+       eax -= last_tsc_low;    /* tsc_low delta */
+
+       /*
+         * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
+         *             = (tsc_low delta) * (usecs_per_clock)
+         *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
+        *
+        * Using a mull instead of a divl saves up to 31 clock cycles
+        * in the critical path.
+         */
+
+       edx = (eax*fast_gettimeoffset_quotient) >> 32;
+
+       /* our adjusted time offset in microseconds */
+       return delay_at_last_interrupt + edx;
+}
+
+
+
+
+#define TICK_SIZE tick
+
+spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED;
+
+extern spinlock_t i8259A_lock;
+
+void do_settimeofday(struct timeval *tv)
+{
+       write_lock_irq(&xtime_lock);
+       vxtime_lock();
+       /*
+        * This is revolting. We need to set "xtime" correctly. However, the
+        * value in this location is the value at the most recent update of
+        * wall time.  Discover what correction gettimeofday() would have
+        * made, and then undo it!
+        */
+       tv->tv_usec -= do_gettimeoffset();
+       tv->tv_usec -= (jiffies - wall_jiffies) * (1000000 / HZ);
+
+       while (tv->tv_usec < 0) {
+               tv->tv_usec += 1000000;
+               tv->tv_sec--;
+       }
+
+       xtime = *tv;
+       vxtime_unlock();
+
+       time_adjust = 0;                /* stop active adjtime() */
+       time_status |= STA_UNSYNC;
+       time_maxerror = NTP_PHASE_LIMIT;
+       time_esterror = NTP_PHASE_LIMIT;
+       write_unlock_irq(&xtime_lock);
+}
+
+/*
+ * In order to set the CMOS clock precisely, set_rtc_mmss has to be
+ * called 500 ms after the second nowtime has started, because when
+ * nowtime is written into the registers of the CMOS clock, it will
+ * jump to the next second precisely 500 ms later. Check the Motorola
+ * MC146818A or Dallas DS12887 data sheet for details.
+ *
+ * BUG: This routine does not handle hour overflow properly; it just
+ *      sets the minutes. Usually you'll only notice that after reboot!
+ */
+static int set_rtc_mmss(unsigned long nowtime)
+{
+       int retval = 0;
+       int real_seconds, real_minutes, cmos_minutes;
+       unsigned char save_control, save_freq_select;
+
+       /* gets recalled with irq locally disabled */
+       spin_lock(&rtc_lock);
+       save_control = CMOS_READ(RTC_CONTROL); /* tell the clock it's being set */
+       CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
+
+       save_freq_select = CMOS_READ(RTC_FREQ_SELECT); /* stop and reset prescaler */
+       CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
+
+       cmos_minutes = CMOS_READ(RTC_MINUTES);
+       if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+               BCD_TO_BIN(cmos_minutes);
+
+       /*
+        * since we're only adjusting minutes and seconds,
+        * don't interfere with hour overflow. This avoids
+        * messing with unknown time zones but requires your
+        * RTC not to be off by more than 15 minutes
+        */
+       real_seconds = nowtime % 60;
+       real_minutes = nowtime / 60;
+       if (((abs(real_minutes - cmos_minutes) + 15)/30) & 1)
+               real_minutes += 30;             /* correct for half hour time zone */
+       real_minutes %= 60;
+
+       if (abs(real_minutes - cmos_minutes) < 30) {
+               if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) {
+                       BIN_TO_BCD(real_seconds);
+                       BIN_TO_BCD(real_minutes);
+               }
+               CMOS_WRITE(real_seconds,RTC_SECONDS);
+               CMOS_WRITE(real_minutes,RTC_MINUTES);
+       } else {
+               printk(KERN_WARNING
+                      "set_rtc_mmss: can't update from %d to %d\n",
+                      cmos_minutes, real_minutes);
+               retval = -1;
+       }
+
+       /* The following flags have to be released exactly in this order,
+        * otherwise the DS12887 (popular MC146818A clone with integrated
+        * battery and quartz) will not reset the oscillator and will not
+        * update precisely 500 ms later. You won't find this mentioned in
+        * the Dallas Semiconductor data sheets, but who believes data
+        * sheets anyway ...                           -- Markus Kuhn
+        */
+       CMOS_WRITE(save_control, RTC_CONTROL);
+       CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+       spin_unlock(&rtc_lock);
+
+       return retval;
+}
+
+/* last time the cmos clock got updated */
+static long last_rtc_update;
+
+int timer_ack;
+
+/*
+ * timer_interrupt() needs to keep up the real-time clock,
+ * as well as call the "do_timer()" routine every clocktick
+ */
+static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_IO_APIC
+       if (timer_ack) {
+               /*
+                * Subtle, when I/O APICs are used we have to ack timer IRQ
+                * manually to reset the IRR bit for do_slow_gettimeoffset().
+                * This will also deassert NMI lines for the watchdog if run
+                * on an 82489DX-based system.
+                */
+               spin_lock(&i8259A_lock);
+               outb(0x0c, 0x20);
+               /* Ack the IRQ; AEOI will end it automatically. */
+               inb(0x20);
+               spin_unlock(&i8259A_lock);
+       }
+#endif
+
+       do_timer(regs);
+/*
+ * In the SMP case we use the local APIC timer interrupt to do the
+ * profiling, except when we simulate SMP mode on a uniprocessor
+ * system, in that case we have to call the local interrupt handler.
+ */
+#ifndef CONFIG_X86_LOCAL_APIC
+       if (!user_mode(regs))
+               x86_do_profile(regs->rip);
+#else
+       if (!using_apic_timer)
+               smp_local_timer_interrupt(regs);
+#endif
+
+       /*
+        * If we have an externally synchronized Linux clock, then update
+        * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
+        * called as close as possible to 500 ms before the new second starts.
+        */
+       if ((time_status & STA_UNSYNC) == 0 &&
+           xtime.tv_sec > last_rtc_update + 660 &&
+           xtime.tv_usec >= 500000 - ((unsigned) tick) / 2 &&
+           xtime.tv_usec <= 500000 + ((unsigned) tick) / 2) {
+               if (set_rtc_mmss(xtime.tv_sec) == 0)
+                       last_rtc_update = xtime.tv_sec;
+               else
+                       last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
+       }
+}
+
+static int use_tsc;
+
+/*
+ * This is the same as the above, except we _also_ save the current
+ * Time Stamp Counter value at the time of the timer interrupt, so that
+ * we later on can estimate the time of day more exactly.
+ */
+static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+       int count;
+
+       /*
+        * Here we are in the timer irq handler. We just have irqs locally
+        * disabled but we don't know if the timer_bh is running on the other
+        * CPU. We need to avoid to SMP race with it. NOTE: we don' t need
+        * the irq version of write_lock because as just said we have irq
+        * locally disabled. -arca
+        */
+       write_lock(&xtime_lock);
+       vxtime_lock();
+
+       if (use_tsc)
+       {
+               /*
+                * It is important that these two operations happen almost at
+                * the same time. We do the RDTSC stuff first, since it's
+                * faster. To avoid any inconsistencies, we need interrupts
+                * disabled locally.
+                */
+
+               /*
+                * Interrupts are just disabled locally since the timer irq
+                * has the SA_INTERRUPT flag set. -arca
+                */
+       
+               /* read Pentium cycle counter */
+
+               rdtscl(last_tsc_low);
+
+               spin_lock(&i8253_lock);
+               outb_p(0x00, 0x43);     /* latch the count ASAP */
+
+               count = inb_p(0x40);    /* read the latched count */
+               count |= inb(0x40) << 8;
+               spin_unlock(&i8253_lock);
+
+               count = ((LATCH-1) - count) * TICK_SIZE;
+               delay_at_last_interrupt = (count + LATCH/2) / LATCH;
+       }
+       do_timer_interrupt(irq, NULL, regs);
+
+       vxtime_unlock();
+       write_unlock(&xtime_lock);
+
+}
+
+/* not static: needed by APM */
+unsigned long get_cmos_time(void)
+{
+       unsigned int year, mon, day, hour, min, sec;
+
+       /* The Linux interpretation of the CMOS clock register contents:
+        * When the Update-In-Progress (UIP) flag goes from 1 to 0, the
+        * RTC registers show the second which has precisely just started.
+        * Let's hope other operating systems interpret the RTC the same way.
+        */
+#ifndef CONFIG_SIMNOW
+       int i;
+       /* FIXME: This would take eons in emulated environment */
+       /* read RTC exactly on falling edge of update flag */
+       for (i = 0 ; i < 1000000 ; i++) /* may take up to 1 second... */
+               if (CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP)
+                       break;
+       for (i = 0 ; i < 1000000 ; i++) /* must try at least 2.228 ms */
+               if (!(CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP))
+                       break;
+#endif
+       do { /* Isn't this overkill ? UIP above should guarantee consistency */
+               sec = CMOS_READ(RTC_SECONDS);
+               min = CMOS_READ(RTC_MINUTES);
+               hour = CMOS_READ(RTC_HOURS);
+               day = CMOS_READ(RTC_DAY_OF_MONTH);
+               mon = CMOS_READ(RTC_MONTH);
+               year = CMOS_READ(RTC_YEAR);
+       } while (sec != CMOS_READ(RTC_SECONDS));
+       if (!(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY) || RTC_ALWAYS_BCD)
+         {
+           BCD_TO_BIN(sec);
+           BCD_TO_BIN(min);
+           BCD_TO_BIN(hour);
+           BCD_TO_BIN(day);
+           BCD_TO_BIN(mon);
+           BCD_TO_BIN(year);
+         }
+       if ((year += 1900) < 1970)
+               year += 100;
+       return mktime(year, mon, day, hour, min, sec);
+}
+
+static struct irqaction irq0  = { timer_interrupt, SA_INTERRUPT, 0, "timer", NULL, NULL};
+
+/* ------ Calibrate the TSC ------- 
+ * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
+ * Too much 64-bit arithmetic here to do this cleanly in C, and for
+ * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
+ * output busy loop as low as possible. We avoid reading the CTC registers
+ * directly because of the awkward 8-bit access mechanism of the 82C54
+ * device.
+ */
+
+#define CALIBRATE_LATCH        (5 * LATCH)
+#define CALIBRATE_TIME (5 * 1000020/HZ)
+
+static unsigned long __init calibrate_tsc(void)
+{
+       /* Set the Gate high, disable speaker */
+       outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+
+       /*
+        * Now let's take care of CTC channel 2
+        *
+        * Set the Gate high, program CTC channel 2 for mode 0,
+        * (interrupt on terminal count mode), binary count,
+        * load 5 * LATCH count, (LSB and MSB) to begin countdown.
+        */
+       outb(0xb0, 0x43);                       /* binary, mode 0, LSB/MSB, Ch 2 */
+       outb(CALIBRATE_LATCH & 0xff, 0x42);     /* LSB of count */
+       outb(CALIBRATE_LATCH >> 8, 0x42);       /* MSB of count */
+
+       {
+               unsigned long start;
+               unsigned long end;
+               unsigned long count;
+
+               {
+                       int low, high;
+                       rdtsc(low,high);
+                       start = ((u64)high)<<32 | low;
+               }
+               count = 0;
+               do {
+                       count++;
+               } while ((inb(0x61) & 0x20) == 0);
+
+               {
+                       int low, high;
+                       rdtsc(low,high);
+                       end = ((u64)high)<<32 | low;
+                       last_tsc_low = low;
+               }
+
+
+               /* Error: ECTCNEVERSET */
+               if (count <= 1)
+                       goto bad_ctc;
+
+               end -= start;
+
+               /* Error: ECPUTOOSLOW */
+               if (end  <= CALIBRATE_TIME)
+                       goto bad_ctc;
+
+               end = (((u64)CALIBRATE_TIME)<<32)/end;
+               return end;
+       }
+
+       /*
+        * The CTC wasn't reliable: we got a hit on the very first read,
+        * or the CPU was so fast/slow that the quotient wouldn't fit in
+        * 32 bits..
+        */
+bad_ctc:
+       return 0;
+}
+
+void __init time_init(void)
+{
+       extern int x86_udelay_tsc;
+       
+       xtime.tv_sec = get_cmos_time();
+       xtime.tv_usec = 0;
+
+/*
+ * If we have APM enabled or the CPU clock speed is variable
+ * (CPU stops clock on HLT or slows clock to save power)
+ * then the TSC timestamps may diverge by up to 1 jiffy from
+ * 'real time' but nothing will break.
+ * The most frequent case is that the CPU is "woken" from a halt
+ * state by the timer interrupt itself, so we get 0 error. In the
+ * rare cases where a driver would "wake" the CPU and request a
+ * timestamp, the maximum error is < 1 jiffy. But timestamps are
+ * still perfectly ordered.
+ */
+
+       if (cpu_has_tsc) {
+               unsigned long tsc_quotient = calibrate_tsc();
+               if (tsc_quotient) {
+                       fast_gettimeoffset_quotient = tsc_quotient;
+                       use_tsc = 1;
+                       /*
+                        *      We could be more selective here I suspect
+                        *      and just enable this for the next intel chips ?
+                        */
+                       x86_udelay_tsc = 1;
+
+                       /* report CPU clock rate in Hz.
+                        * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
+                        * clock/second. Our precision is about 100 ppm.
+                        */
+                       {                       
+                               cpu_khz = ((1000000*(1UL<<32)) / tsc_quotient); /* FIXME: is it right? */
+                               printk("Detected %ld Hz processor.\n", cpu_khz);
+                       }
+               }
+       }
+
+       setup_irq(0, &irq0);
+}
diff --git a/arch/x86_64/kernel/trampoline.S b/arch/x86_64/kernel/trampoline.S
new file mode 100644 (file)
index 0000000..3d1a9a1
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ *
+ *     Trampoline.S    Derived from Setup.S by Linus Torvalds
+ *
+ *     4 Jan 1997 Michael Chastain: changed to gnu as.
+ *
+ *     Entry: CS:IP point to the start of our code, we are 
+ *     in real mode with no stack, but the rest of the 
+ *     trampoline page to make our stack and everything else
+ *     is a mystery.
+ *
+ *     In fact we don't actually need a stack so we don't
+ *     set one up.
+ *
+ *     On entry to trampoline_data, the processor is in real mode
+ *     with 16-bit addressing and 16-bit data.  CS has some value
+ *     and IP is zero.  Thus, data addresses need to be absolute
+ *     (no relocation) and are taken with regard to r_base.
+ *
+ *     If you work on this file, check the object module with objdump
+ *     --full-contents --reloc to make sure there are no relocation
+ *     entries. For the GDT entry we do hand relocation in smpboot.c
+ *     because of 64bit linker limitations.
+ */
+
+#include <linux/linkage.h>
+#include <asm/segment.h>
+#include <asm/page.h>
+
+.data
+
+.code16
+
+ENTRY(trampoline_data)
+r_base = .
+
+       mov     %cs, %ax        # Code and data in the same place
+       mov     %ax, %ds
+
+       mov     $1, %bx         # Flag an SMP trampoline
+       cli                     # We should be safe anyway
+
+       movl    $0xA5A5A5A5, trampoline_data - r_base
+                               # write marker for master knows we're running
+
+       lidt    idt_48 - r_base # load idt with 0, 0
+       lgdt    gdt_48 - r_base # load gdt with whatever is appropriate
+
+       xor     %ax, %ax
+       inc     %ax             # protected mode (PE) bit
+       lmsw    %ax             # into protected mode
+       jmp     flush_instr
+flush_instr:
+       ljmpl   $__KERNEL_CS, $0x00100000
+                       # jump to startup_32 in arch/i386/kernel/head.S
+
+idt_48:
+       .word   0                       # idt limit = 0
+       .word   0, 0                    # idt base = 0L
+
+gdt_48:
+       .short  0x0800                  # gdt limit = 2048, 256 GDT entries
+       .globl tramp_gdt_ptr
+tramp_gdt_ptr:
+       .long   0                       # gdt base = gdt (first SMP CPU)
+                                       # this is filled in by C because the 64bit
+                                       # linker doesn't support absolute 32bit
+                                       # relocations. 
+       
+
+.globl SYMBOL_NAME(trampoline_end)
+SYMBOL_NAME_LABEL(trampoline_end)
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
new file mode 100644 (file)
index 0000000..506cf7e
--- /dev/null
@@ -0,0 +1,771 @@
+/*
+ *  linux/arch/x86-64/traps.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ *
+ *  Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+/*
+ * 'traps.c' handles hardware traps and faults after we have saved some
+ * state in 'entry.S'.
+ */
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/debugreg.h>
+#include <asm/desc.h>
+#include <asm/i387.h>
+#include <asm/kdebug.h>
+#include <asm/uaccess.h>
+
+#include <asm/smp.h>
+#include <asm/pgalloc.h>
+#include <asm/pda.h>
+
+#include <linux/irq.h>
+
+asmlinkage int system_call(void);
+asmlinkage int kernel_syscall(void);
+extern void ia32_syscall(void);
+
+char doublefault_stack[4*1024]; 
+#ifndef CONFIG_SMP
+char stackfault_stack[4*1024];
+#endif
+
+/* We really need to get rid of F0 0F workaround. Setting up new idts
+   is not an option */
+
+struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
+               { 0, 0 }, { 0, 0 } };
+
+extern struct gate_struct idt_table[256]; 
+
+asmlinkage void divide_error(void);
+asmlinkage void debug(void);
+asmlinkage void nmi(void);
+asmlinkage void int3(void);
+asmlinkage void overflow(void);
+asmlinkage void bounds(void);
+asmlinkage void invalid_op(void);
+asmlinkage void device_not_available(void);
+asmlinkage void double_fault(void);
+asmlinkage void coprocessor_segment_overrun(void);
+asmlinkage void invalid_TSS(void);
+asmlinkage void segment_not_present(void);
+asmlinkage void stack_segment(void);
+asmlinkage void general_protection(void);
+asmlinkage void page_fault(void);
+asmlinkage void coprocessor_error(void);
+asmlinkage void simd_coprocessor_error(void);
+asmlinkage void reserved(void);
+asmlinkage void alignment_check(void);
+asmlinkage void spurious_interrupt_bug(void);
+
+struct notifier_block *die_chain;
+
+int kstack_depth_to_print = 24;
+
+#ifdef CONFIG_MODULES
+
+extern struct module *module_list;
+extern struct module kernel_module;
+
+static inline int kernel_text_address(unsigned long addr)
+{
+   int retval = 0;
+   struct module *mod;
+
+   if (addr >= (unsigned long) &_stext &&
+       addr <= (unsigned long) &_etext)
+       return 1;
+
+   for (mod = module_list; mod != &kernel_module; mod = mod->next) {
+       /* mod_bound tests for addr being inside the vmalloc'ed
+        * module area. Of course it'd be better to test only
+        * for the .text subset... */
+       if (mod_bound(addr, 0, mod)) {
+           retval = 1;
+           break;
+       }
+   }
+
+   return retval;
+}
+
+#else
+
+static inline int kernel_text_address(unsigned long addr)
+{
+   return (addr >= (unsigned long) &_stext &&
+       addr <= (unsigned long) &_etext);
+}
+
+#endif
+
+/*
+ * These constants are for searching for possible module text
+ * segments. MODULE_RANGE is a guess of how much space is likely
+ * to be vmalloced.
+ */
+#define MODULE_RANGE (8*1024*1024)
+
+void show_trace(unsigned long *stack)
+{
+       unsigned long addr;
+       unsigned long *irqstack, *irqstack_end;
+       /* FIXME: should read the cpuid from the APIC; to still work with bogus %gs */
+       const int cpu = smp_processor_id();
+       int i;
+
+       printk("\nCall Trace: ");
+
+       irqstack = (unsigned long *) &(cpu_pda[cpu].irqstack);
+       irqstack_end = (unsigned long *) ((char *)irqstack + sizeof_field(struct x8664_pda, irqstack)); 
+
+       i = 1;
+       if (stack >= irqstack && stack < irqstack_end) {
+               while (stack < irqstack_end) {
+                       addr = *stack++;
+                       /*
+                        * If the address is either in the text segment of the
+                        * kernel, or in the region which contains vmalloc'ed
+                        * memory, it *may* be the address of a calling
+                        * routine; if so, print it so that someone tracing
+                        * down the cause of the crash will be able to figure
+                        * out the call path that was taken.
+                        */
+                        if (kernel_text_address(addr)) {  
+                               if (i && ((i % 6) == 0))        
+                                       printk("\n       ");
+                               printk("[<%016lx>] ", addr);
+                               i++;
+                       }
+               } 
+               stack = (unsigned long *) (irqstack_end[-1]);
+               printk(" <EOI> ");
+#if 1
+               if (stack < (unsigned long *)current || 
+                   (char*)stack > ((char*)current->thread_info)+THREAD_SIZE) 
+                       printk("\n" KERN_DEBUG 
+                       "no stack at the end of irqstack; stack:%p, cur:%p/%p\n",
+                              stack, current, ((char*)current)+THREAD_SIZE); 
+#endif                        
+       } 
+
+       
+
+       while (((long) stack & (THREAD_SIZE-1)) != 0) {
+               addr = *stack++;
+               /*
+                * If the address is either in the text segment of the
+                * kernel, or in the region which contains vmalloc'ed
+                * memory, it *may* be the address of a calling
+                * routine; if so, print it so that someone tracing
+                * down the cause of the crash will be able to figure
+                * out the call path that was taken.
+                */
+               if (kernel_text_address(addr)) {         
+                       if (i && ((i % 6) == 0))        
+                               printk("\n       ");
+                       printk("[<%016lx>] ", addr);
+                       i++;
+               }
+       }
+       printk("\n");
+}
+
+void show_trace_task(struct task_struct *tsk)
+{
+       unsigned long rsp = tsk->thread.rsp;
+
+       /* User space on another CPU? */
+       if ((rsp ^ (unsigned long)tsk->thread_info) & (PAGE_MASK<<1))
+               return;
+       show_trace((unsigned long *)rsp);
+}
+
+void show_stack(unsigned long * rsp)
+{
+       unsigned long *stack;
+       int i;
+
+       // debugging aid: "show_stack(NULL);" prints the
+       // back trace for this cpu.
+
+       if(rsp==NULL)
+               rsp=(unsigned long*)&rsp;
+
+       stack = rsp;
+       for(i=0; i < kstack_depth_to_print; i++) {
+               if (((long) stack & (THREAD_SIZE-1)) == 0)
+                       break;
+               if (i && ((i % 8) == 0))
+                       printk("\n       ");
+               printk("%016lx ", *stack++);
+       }
+}
+
+extern void dump_pagetable(void); 
+
+void show_registers(struct pt_regs *regs)
+{
+       int i;
+       int in_kernel = 1;
+       unsigned long rsp;
+#ifdef CONFIG_SMP
+       /* For SMP should get the APIC id here, just to protect against corrupted GS */ 
+       const int cpu = smp_processor_id(); 
+#else
+       const int cpu = 0;
+#endif 
+       struct task_struct *cur = cpu_pda[cpu].pcurrent; 
+
+       rsp = (unsigned long) (&regs->rsp);
+       if (regs->rsp < TASK_SIZE) {
+               in_kernel = 0;
+               rsp = regs->rsp;
+       }
+       printk("CPU %d ", cpu);
+       show_regs(regs);
+       printk("Process %s (pid: %d, thread_info:%p task:%p)\n",
+               cur->comm, cur->pid, cur->thread_info, cur);
+
+       dump_pagetable();
+
+       /*
+        * When in-kernel, we also print out the stack and code at the
+        * time of the fault..
+        */
+       if (in_kernel) {
+
+               printk("Stack: ");
+               show_stack((unsigned long*)rsp);
+
+               printk("\nCode: ");
+               if(regs->rip < PAGE_OFFSET)
+                       goto bad;
+
+               for(i=0;i<20;i++)
+               {
+                       unsigned char c;
+                       if(__get_user(c, &((unsigned char*)regs->rip)[i])) {
+bad:
+                               printk(" Bad RIP value.");
+                               break;
+                       }
+                       printk("%02x ", c);
+               }
+       }
+       printk("\n");
+       show_trace((unsigned long *)rsp);
+}      
+
+void handle_BUG(struct pt_regs *regs)
+{ 
+       struct bug_frame f;
+       char tmp;
+
+       if ((regs->cs & 3) || regs->rip < __PAGE_OFFSET)
+               return; 
+       if (__copy_from_user(&f, (struct bug_frame *) regs->rip, sizeof(struct bug_frame)))
+               return; 
+       if ((unsigned long)f.filename < __PAGE_OFFSET) 
+               return;
+       if (__get_user(tmp, f.filename))
+               f.filename = "unmapped filename"; 
+       printk("Kernel BUG at %.30s:%d\n", f.filename, f.line);         
+} 
+
+spinlock_t die_lock = SPIN_LOCK_UNLOCKED;
+
+void die(const char * str, struct pt_regs * regs, long err)
+{
+       struct die_args args = { regs, str, err };
+       console_verbose();
+       notifier_call_chain(&die_chain,  DIE_DIE, &args); 
+       spin_lock_irq(&die_lock);
+       bust_spinlocks(1);
+       handle_BUG(regs); 
+       printk("%s: %04lx\n", str, err & 0xffff);
+       show_registers(regs);
+       bust_spinlocks(0);
+       spin_unlock_irq(&die_lock);
+       do_exit(SIGSEGV);
+}
+
+static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
+{
+       if (!(regs->eflags & VM_MASK) && (regs->rip >= TASK_SIZE))
+               die(str, regs, err);
+}
+
+static inline unsigned long get_cr2(void)
+{
+       unsigned long address;
+
+       /* get the address */
+       __asm__("movq %%cr2,%0":"=r" (address));
+       return address;
+}
+
+static void inline do_trap(int trapnr, int signr, char *str, int vm86,
+                          struct pt_regs * regs, long error_code, siginfo_t *info)
+{
+       if ((regs->cs & 3) == 0)
+               goto kernel_trap;
+
+
+#if 0
+       printk("%d/%s trap %d sig %d %s rip:%lx rsp:%lx error_code:%lx\n",
+              current->pid, current->comm,
+              trapnr, signr, str, regs->rip, regs->rsp, error_code);
+#endif
+
+       {
+               struct task_struct *tsk = current;
+               tsk->thread.error_code = error_code;
+               tsk->thread.trap_no = trapnr;
+               if (info)
+                       force_sig_info(signr, info, tsk);
+               else
+                       force_sig(signr, tsk);
+               return;
+       }
+
+       kernel_trap: {       
+               unsigned long fixup = search_exception_table(regs->rip);
+               if (fixup)
+                       regs->rip = fixup;
+               else    
+                       die(str, regs, error_code);
+               return;
+       }
+}
+
+#define DO_ERROR(trapnr, signr, str, name) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+       do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
+}
+
+#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+       siginfo_t info; \
+       info.si_signo = signr; \
+       info.si_errno = 0; \
+       info.si_code = sicode; \
+       info.si_addr = (void *)siaddr; \
+       do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
+}
+
+#define DO_VM86_ERROR(trapnr, signr, str, name) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+       do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
+}
+
+#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
+{ \
+       siginfo_t info; \
+       info.si_signo = signr; \
+       info.si_errno = 0; \
+       info.si_code = sicode; \
+       info.si_addr = (void *)siaddr; \
+       do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
+}
+
+DO_VM86_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->rip)
+DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
+DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
+DO_ERROR_INFO( 6, SIGILL,  "invalid operand", invalid_op, ILL_ILLOPN, regs->rip)
+DO_VM86_ERROR( 7, SIGSEGV, "device not available", device_not_available)
+DO_ERROR( 8, SIGSEGV, "double fault", double_fault)
+DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
+DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
+DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
+DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
+DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, get_cr2())
+DO_ERROR(18, SIGSEGV, "reserved", reserved)
+
+asmlinkage void do_int3(struct pt_regs * regs, long error_code)
+{
+       struct die_args args = { regs, "int3", error_code }; 
+       notifier_call_chain(&die_chain, DIE_INT3, &args); 
+       do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
+}
+
+asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
+{
+       if ((regs->cs & 3)==0)
+               goto gp_in_kernel;
+
+       current->thread.error_code = error_code;
+       current->thread.trap_no = 13;
+       force_sig(SIGSEGV, current);
+       return;
+
+gp_in_kernel:
+       {
+               unsigned long fixup;
+               fixup = search_exception_table(regs->rip);
+               if (fixup) {
+                       regs->rip = fixup;
+                       return;
+               }
+               die("general protection fault", regs, error_code);
+       }
+}
+
+static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
+{
+       printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
+       printk("You probably have a hardware problem with your RAM chips\n");
+
+       /* Clear and disable the memory parity error line. */
+       reason = (reason & 0xf) | 4;
+       outb(reason, 0x61);
+}
+
+static void io_check_error(unsigned char reason, struct pt_regs * regs)
+{
+       printk("NMI: IOCK error (debug interrupt?)\n");
+       show_registers(regs);
+
+       /* Re-enable the IOCK line, wait for a few seconds */
+       reason = (reason & 0xf) | 8;
+       outb(reason, 0x61);
+       mdelay(2000);
+       reason &= ~8;
+       outb(reason, 0x61);
+}
+
+static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+{
+       printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
+       printk("Dazed and confused, but trying to continue\n");
+       printk("Do you have a strange power saving mode enabled?\n");
+}
+
+asmlinkage void do_nmi(struct pt_regs * regs)
+{
+       unsigned char reason = inb(0x61);
+
+
+       ++nmi_count(smp_processor_id());
+       if (!(reason & 0xc0)) {
+#if CONFIG_X86_LOCAL_APIC
+               /*
+                * Ok, so this is none of the documented NMI sources,
+                * so it must be the NMI watchdog.
+                */
+               if (nmi_watchdog) {
+                       nmi_watchdog_tick(regs);
+                       return;
+               }
+#endif
+               unknown_nmi_error(reason, regs);
+               return;
+       }
+       if (reason & 0x80)
+               mem_parity_error(reason, regs);
+       if (reason & 0x40)
+               io_check_error(reason, regs);
+       /*
+        * Reassert NMI in case it became active meanwhile
+        * as it's edge-triggered.
+        */
+       outb(0x8f, 0x70);
+       inb(0x71);              /* dummy */
+       outb(0x0f, 0x70);
+       inb(0x71);              /* dummy */
+}
+
+asmlinkage void do_debug(struct pt_regs * regs, long error_code)
+{
+       unsigned long condition;
+       struct task_struct *tsk = current;
+       siginfo_t info;
+
+       asm("movq %%db6,%0" : "=r" (condition));
+
+       /* Mask out spurious debug traps due to lazy DR7 setting */
+       if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
+               if (!tsk->thread.debugreg[7]) { 
+                       goto clear_dr7;
+               }
+       }
+
+       tsk->thread.debugreg[6] = condition;
+
+       /* Mask out spurious TF errors due to lazy TF clearing */
+       if (condition & DR_STEP) {
+               /*
+                * The TF error should be masked out only if the current
+                * process is not traced and if the TRAP flag has been set
+                * previously by a tracing process (condition detected by
+                * the PT_DTRACE flag); remember that the i386 TRAP flag
+                * can be modified by the process itself in user mode,
+                * allowing programs to debug themselves without the ptrace()
+                * interface.
+                */
+               if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE)
+                       goto clear_TF;
+       }
+
+       /* Ok, finally something we can handle */
+       /* XXX: add die_chain here */
+       tsk->thread.trap_no = 1;
+       tsk->thread.error_code = error_code;
+       info.si_signo = SIGTRAP;
+       info.si_errno = 0;
+       info.si_code = TRAP_BRKPT;
+       info.si_addr = ((regs->cs & 3) == 0) ? (void *)tsk->thread.rip : 
+                                               (void *)regs->rip;
+       force_sig_info(SIGTRAP, &info, tsk);    
+clear_dr7:
+       asm("movq %0,%%db7"::"r"(0UL));
+       return;
+
+clear_TF:
+       regs->eflags &= ~TF_MASK;
+       return;
+}
+
+/*
+ * Note that we play around with the 'TS' bit in an attempt to get
+ * the correct behaviour even in the presence of the asynchronous
+ * IRQ13 behaviour
+ */
+void math_error(void *eip)
+{
+       struct task_struct * task;
+       siginfo_t info;
+       unsigned short cwd, swd;
+
+       /*
+        * Save the info for the exception handler and clear the error.
+        */
+       task = current;
+       save_init_fpu(task);
+       task->thread.trap_no = 16;
+       task->thread.error_code = 0;
+       info.si_signo = SIGFPE;
+       info.si_errno = 0;
+       info.si_code = __SI_FAULT;
+       info.si_addr = eip;
+       /*
+        * (~cwd & swd) will mask out exceptions that are not set to unmasked
+        * status.  0x3f is the exception bits in these regs, 0x200 is the
+        * C1 reg you need in case of a stack fault, 0x040 is the stack
+        * fault bit.  We should only be taking one exception at a time,
+        * so if this combination doesn't produce any single exception,
+        * then we have a bad program that isn't syncronizing its FPU usage
+        * and it will suffer the consequences since we won't be able to
+        * fully reproduce the context of the exception
+        */
+       cwd = get_fpu_cwd(task);
+       swd = get_fpu_swd(task);
+       switch (((~cwd) & swd & 0x3f) | (swd & 0x240)) {
+               case 0x000:
+               default:
+                       break;
+               case 0x001: /* Invalid Op */
+               case 0x040: /* Stack Fault */
+               case 0x240: /* Stack Fault | Direction */
+                       info.si_code = FPE_FLTINV;
+                       break;
+               case 0x002: /* Denormalize */
+               case 0x010: /* Underflow */
+                       info.si_code = FPE_FLTUND;
+                       break;
+               case 0x004: /* Zero Divide */
+                       info.si_code = FPE_FLTDIV;
+                       break;
+               case 0x008: /* Overflow */
+                       info.si_code = FPE_FLTOVF;
+                       break;
+               case 0x020: /* Precision */
+                       info.si_code = FPE_FLTRES;
+                       break;
+       }
+       force_sig_info(SIGFPE, &info, task);
+}
+
+asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code)
+{
+       ignore_irq13 = 1;
+       math_error((void *)regs->rip);
+}
+
+asmlinkage void bad_intr(void)
+{
+       printk("bad interrupt"); 
+}
+
+void simd_math_error(void *eip)
+{
+       struct task_struct * task;
+       siginfo_t info;
+       unsigned short mxcsr;
+
+       /*
+        * Save the info for the exception handler and clear the error.
+        */
+       task = current;
+       save_init_fpu(task);
+       task->thread.trap_no = 19;
+       task->thread.error_code = 0;
+       info.si_signo = SIGFPE;
+       info.si_errno = 0;
+       info.si_code = __SI_FAULT;
+       info.si_addr = eip;
+       /*
+        * The SIMD FPU exceptions are handled a little differently, as there
+        * is only a single status/control register.  Thus, to determine which
+        * unmasked exception was caught we must mask the exception mask bits
+        * at 0x1f80, and then use these to mask the exception bits at 0x3f.
+        */
+       mxcsr = get_fpu_mxcsr(task);
+       switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
+               case 0x000:
+               default:
+                       break;
+               case 0x001: /* Invalid Op */
+                       info.si_code = FPE_FLTINV;
+                       break;
+               case 0x002: /* Denormalize */
+               case 0x010: /* Underflow */
+                       info.si_code = FPE_FLTUND;
+                       break;
+               case 0x004: /* Zero Divide */
+                       info.si_code = FPE_FLTDIV;
+                       break;
+               case 0x008: /* Overflow */
+                       info.si_code = FPE_FLTOVF;
+                       break;
+               case 0x020: /* Precision */
+                       info.si_code = FPE_FLTRES;
+                       break;
+       }
+       force_sig_info(SIGFPE, &info, task);
+}
+
+asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs,
+                                         long error_code)
+{
+       if (cpu_has_xmm) {
+               /* Handle SIMD FPU exceptions on PIII+ processors. */
+               ignore_irq13 = 1;
+               simd_math_error((void *)regs->rip);
+       } else {
+               /*
+                * Handle strange cache flush from user space exception
+                * in all other cases.  This is undocumented behaviour.
+                */
+               die_if_kernel("cache flush denied", regs, error_code);
+               current->thread.trap_no = 19;
+               current->thread.error_code = error_code;
+               force_sig(SIGSEGV, current);
+       }
+}
+
+asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs,
+                                         long error_code)
+{
+#if 0
+       /* No need to warn about this any longer. */
+       printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
+#endif
+}
+
+/*
+ *  'math_state_restore()' saves the current math information in the
+ * old math state array, and gets the new ones from the current task
+ *
+ * Careful.. There are problems with IBM-designed IRQ13 behaviour.
+ * Don't touch unless you *really* know how it works.
+ *
+ * Must be called with kernel preemption disabled.
+ */
+asmlinkage void math_state_restore(void)
+{
+       clts();                 /* Allow maths ops (or we recurse) */
+
+       if (current->used_math) {
+               restore_fpu(current);
+       } else {
+               init_fpu();
+       }
+       set_thread_flag(TIF_USEDFPU); 
+}
+
+asmlinkage void math_emulate(void)
+{
+       printk("math-emulation not enabled and no coprocessor found.\n");
+       printk("killing %s.\n",current->comm);
+       force_sig(SIGFPE,current);
+       schedule();
+}
+
+void __init trap_init(void)
+{
+       set_intr_gate(0,&divide_error);
+       set_intr_gate(1,&debug);
+       set_intr_gate(2,&nmi);
+       set_system_gate(3,&int3);       /* int3-5 can be called from all */
+       set_system_gate(4,&overflow);
+       set_system_gate(5,&bounds);
+       set_intr_gate(6,&invalid_op);
+       set_intr_gate(7,&device_not_available);
+       set_intr_gate_ist(8,&double_fault, 1);
+       set_intr_gate(9,&coprocessor_segment_overrun);
+       set_intr_gate(10,&invalid_TSS);
+       set_intr_gate(11,&segment_not_present);
+       set_intr_gate_ist(12,&stack_segment,STACKFAULT_STACK);
+       set_intr_gate(13,&general_protection);
+       set_intr_gate(14,&page_fault);
+       set_intr_gate(15,&spurious_interrupt_bug);
+       set_intr_gate(16,&coprocessor_error);
+       set_intr_gate(17,&alignment_check);
+       set_intr_gate(19,&simd_coprocessor_error);
+
+#ifdef CONFIG_IA32_EMULATION
+       set_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
+#endif
+       
+#if 0
+       /*
+        * default LDT is a single-entry callgate to lcall7 for iBCS
+        * and a callgate to lcall27 for Solaris/x86 binaries
+        */
+       set_call_gate(&default_ldt[0],lcall7);
+       set_call_gate(&default_ldt[4],lcall27);
+#endif
+       
+       /*
+        * Should be a barrier for any external CPU state.
+        */
+       cpu_init();
+}
+
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
new file mode 100644 (file)
index 0000000..31dade4
--- /dev/null
@@ -0,0 +1,190 @@
+/*
+ *  linux/arch/x86_64/kernel/vsyscall.c
+ *
+ *  Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
+ *
+ *  Thanks to hpa@transmeta.com for some useful hint.
+ *  Special thanks to Ingo Molnar for his early experience with
+ *  a different vsyscall implementation for Linux/IA32 and for the name.
+ *
+ *  vsyscall 1 is located at -10Mbyte, vsyscall 2 is located
+ *  at virtual address -10Mbyte+1024bytes etc... There are at max 8192
+ *  vsyscalls. One vsyscall can reserve more than 1 slot to avoid
+ *  jumping out of line if necessary.
+ *
+ *  $Id: vsyscall.c,v 1.4 2001/09/27 17:58:13 ak Exp $
+ */
+
+/*
+ * TODO 2001-03-20:
+ *
+ * 1) make page fault handler detect faults on page1-page-last of the vsyscall
+ *    virtual space, and make it increase %rip and write -ENOSYS in %rax (so
+ *    we'll be able to upgrade to a new glibc without upgrading kernel after
+ *    we add more vsyscalls.
+ * 2) Possibly we need a fixmap table for the vsyscalls too if we want
+ *    to avoid SIGSEGV and we want to return -EFAULT from the vsyscalls as well.
+ *    Can we segfault inside a "syscall"? We can fix this anytime and those fixes
+ *    won't be visible for userspace. Not fixing this is a noop for correct programs,
+ *    broken programs will segfault and there's no security risk until we choose to
+ *    fix it.
+ *
+ * These are not urgent things that we need to address only before shipping the first
+ * production binary kernels.
+ */
+
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/timer.h>
+
+#include <asm/vsyscall.h>
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/fixmap.h>
+#include <asm/errno.h>
+
+#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
+
+static inline void timeval_normalize(struct timeval * tv)
+{
+       time_t __sec;
+
+       __sec = tv->tv_usec / 1000000;
+       if (__sec)
+       {
+               tv->tv_usec %= 1000000;
+               tv->tv_sec += __sec;
+       }
+}
+
+long __vxtime_sequence[2] __section_vxtime_sequence;
+
+/* The rest of the kernel knows it as this. */
+extern void do_gettimeofday(struct timeval *tv) __attribute__((alias("do_vgettimeofday"))); 
+
+inline void do_vgettimeofday(struct timeval * tv)
+{
+       long sequence;
+       unsigned long usec, sec;
+
+       do {
+               unsigned long eax, edx;
+
+               sequence = __vxtime_sequence[1];
+               rmb();
+               
+               /* Read the Time Stamp Counter */
+               rdtsc(eax,edx);
+
+               /* .. relative to previous jiffy (32 bits is enough) */
+               eax -= __last_tsc_low;  /* tsc_low delta */
+
+               /*
+                * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
+                *             = (tsc_low delta) * (usecs_per_clock)
+                *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
+                *
+                * Using a mull instead of a divl saves up to 31 clock cycles
+                * in the critical path.
+                */
+
+               edx = (eax*__fast_gettimeoffset_quotient) >> 32;
+
+               /* our adjusted time offset in microseconds */
+               usec = __delay_at_last_interrupt + edx;
+
+               {
+                       unsigned long lost = __jiffies - __wall_jiffies;
+                       if (lost)
+                               usec += lost * (1000000 / HZ);
+               }
+               sec = __xtime.tv_sec;
+               usec += __xtime.tv_usec;
+
+               rmb();
+       } while (sequence != __vxtime_sequence[0]);
+
+       tv->tv_sec = sec;
+       tv->tv_usec = usec;
+       timeval_normalize(tv);
+}
+
+static inline void do_get_tz(struct timezone * tz)
+{
+       long sequence;
+
+       do {
+               sequence = __vxtime_sequence[1];
+               rmb();
+
+               *tz = __sys_tz;
+
+               rmb();
+       } while (sequence != __vxtime_sequence[0]);
+}
+
+static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
+{
+       if (tv)
+               do_vgettimeofday(tv);
+       if (tz)
+               do_get_tz(tz);
+       return 0;
+}
+
+static time_t __vsyscall(1) vtime(time_t * time)
+{
+       long sequence;
+       time_t __time;
+
+       do {
+               sequence = __vxtime_sequence[1];
+               rmb();
+
+               __time = __xtime.tv_sec;
+
+               rmb();
+       } while (sequence != __vxtime_sequence[0]);
+
+       if (time)
+               *time = __time;
+       return __time;
+}
+
+static long __vsyscall(2) venosys_0(void)
+{
+       return -ENOSYS;
+}
+
+static long __vsyscall(3) venosys_1(void)
+{
+       return -ENOSYS;
+}
+
+static void __init map_vsyscall(void)
+{
+       extern char __vsyscall_0;
+       unsigned long physaddr_page0 = (unsigned long) &__vsyscall_0 - __START_KERNEL_map;
+
+       __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
+}
+
+static int __init vsyscall_init(void)
+{
+       printk("VSYSCALL: consistency checks...");
+       if ((unsigned long) &vgettimeofday != VSYSCALL_ADDR(__NR_vgettimeofday))
+               panic("vgettimeofday link addr broken");
+       if ((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime))
+               panic("vtime link addr broken");
+       if (VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))
+               panic("fixmap first vsyscall %lx should be %lx", __fix_to_virt(VSYSCALL_FIRST_PAGE),
+                     VSYSCALL_ADDR(0));
+       printk("passed...mapping...");
+       map_vsyscall();
+       printk("done.\n");
+
+       return 0;
+}
+
+__initcall(vsyscall_init);
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
new file mode 100644 (file)
index 0000000..5499e8e
--- /dev/null
@@ -0,0 +1,162 @@
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/smp.h>
+#include <linux/user.h>
+#include <linux/elfcore.h>
+#include <linux/mca.h>
+#include <linux/sched.h>
+#include <linux/in6.h>
+#include <linux/interrupt.h>
+#include <linux/smp_lock.h>
+#include <linux/pm.h>
+#include <linux/pci.h>
+#include <linux/apm_bios.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/tty.h>
+
+#include <asm/semaphore.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+#include <asm/uaccess.h>
+#include <asm/checksum.h>
+#include <asm/io.h>
+#include <asm/hardirq.h>
+#include <asm/delay.h>
+#include <asm/irq.h>
+#include <asm/mmx.h>
+#include <asm/desc.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+
+extern void dump_thread(struct pt_regs *, struct user *);
+extern spinlock_t rtc_lock;
+
+#if defined(CONFIG_APM) || defined(CONFIG_APM_MODULE)
+extern void machine_real_restart(unsigned char *, int);
+EXPORT_SYMBOL(machine_real_restart);
+#endif
+
+#ifdef CONFIG_SMP
+extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
+extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
+#endif
+
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE)
+extern struct drive_info_struct drive_info;
+EXPORT_SYMBOL(drive_info);
+#endif
+
+extern unsigned long get_cmos_time(void);
+
+/* platform dependent support */
+EXPORT_SYMBOL(boot_cpu_data);
+EXPORT_SYMBOL(dump_thread);
+EXPORT_SYMBOL(dump_fpu);
+EXPORT_SYMBOL(__ioremap);
+EXPORT_SYMBOL(iounmap);
+EXPORT_SYMBOL(enable_irq);
+EXPORT_SYMBOL(disable_irq);
+EXPORT_SYMBOL(disable_irq_nosync);
+EXPORT_SYMBOL(probe_irq_mask);
+EXPORT_SYMBOL(kernel_thread);
+EXPORT_SYMBOL(pm_idle);
+EXPORT_SYMBOL(pm_power_off);
+EXPORT_SYMBOL(get_cmos_time);
+EXPORT_SYMBOL(apm_info);
+
+#ifdef CONFIG_IO_DEBUG
+EXPORT_SYMBOL(__io_virt_debug);
+#endif
+
+EXPORT_SYMBOL_NOVERS(__down_failed);
+EXPORT_SYMBOL_NOVERS(__down_failed_interruptible);
+EXPORT_SYMBOL_NOVERS(__down_failed_trylock);
+EXPORT_SYMBOL_NOVERS(__up_wakeup);
+/* Networking helper routines. */
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
+/* Delay loops */
+EXPORT_SYMBOL(__udelay);
+EXPORT_SYMBOL(__delay);
+EXPORT_SYMBOL(__const_udelay);
+
+EXPORT_SYMBOL_NOVERS(__get_user_1);
+EXPORT_SYMBOL_NOVERS(__get_user_2);
+EXPORT_SYMBOL_NOVERS(__get_user_4);
+EXPORT_SYMBOL_NOVERS(__put_user_1);
+EXPORT_SYMBOL_NOVERS(__put_user_2);
+EXPORT_SYMBOL_NOVERS(__put_user_4);
+
+EXPORT_SYMBOL(strtok);
+EXPORT_SYMBOL(strpbrk);
+EXPORT_SYMBOL(simple_strtol);
+EXPORT_SYMBOL(strstr);
+
+EXPORT_SYMBOL(strncpy_from_user);
+EXPORT_SYMBOL(__strncpy_from_user);
+EXPORT_SYMBOL(clear_user);
+EXPORT_SYMBOL(__clear_user);
+EXPORT_SYMBOL(__generic_copy_from_user);
+EXPORT_SYMBOL(__generic_copy_to_user);
+EXPORT_SYMBOL(strnlen_user);
+
+EXPORT_SYMBOL(pci_alloc_consistent);
+EXPORT_SYMBOL(pci_free_consistent);
+
+#ifdef CONFIG_PCI
+EXPORT_SYMBOL(pcibios_penalize_isa_irq);
+EXPORT_SYMBOL(pci_mem_start);
+#endif
+
+#ifdef CONFIG_X86_USE_3DNOW
+EXPORT_SYMBOL(_mmx_memcpy);
+EXPORT_SYMBOL(mmx_clear_page);
+EXPORT_SYMBOL(mmx_copy_page);
+#endif
+
+#ifdef CONFIG_SMP
+EXPORT_SYMBOL(cpu_data);
+EXPORT_SYMBOL(kernel_flag);
+EXPORT_SYMBOL(smp_num_cpus);
+EXPORT_SYMBOL(cpu_online_map);
+EXPORT_SYMBOL_NOVERS(__write_lock_failed);
+EXPORT_SYMBOL_NOVERS(__read_lock_failed);
+
+/* Global SMP irq stuff */
+EXPORT_SYMBOL(synchronize_irq);
+EXPORT_SYMBOL(global_irq_holder);
+EXPORT_SYMBOL(__global_cli);
+EXPORT_SYMBOL(__global_sti);
+EXPORT_SYMBOL(__global_save_flags);
+EXPORT_SYMBOL(__global_restore_flags);
+EXPORT_SYMBOL(smp_call_function);
+
+/* TLB flushing */
+EXPORT_SYMBOL(flush_tlb_page);
+#endif
+
+#ifdef CONFIG_MCA
+EXPORT_SYMBOL(machine_id);
+#endif
+
+#ifdef CONFIG_VT
+EXPORT_SYMBOL(screen_info);
+#endif
+
+EXPORT_SYMBOL(get_wchan);
+
+EXPORT_SYMBOL(rtc_lock);
+
+#undef memcpy
+#undef memset
+extern void * memset(void *,int,__kernel_size_t);
+extern void * memcpy(void *,const void *,__kernel_size_t);
+EXPORT_SYMBOL_NOVERS(memcpy);
+EXPORT_SYMBOL_NOVERS(memset);
+
+EXPORT_SYMBOL(empty_zero_page);
+
+#ifdef CONFIG_HAVE_DEC_LOCK
+EXPORT_SYMBOL(atomic_dec_and_lock);
+#endif
+
diff --git a/arch/x86_64/lib/Makefile b/arch/x86_64/lib/Makefile
new file mode 100644 (file)
index 0000000..0e6b14b
--- /dev/null
@@ -0,0 +1,17 @@
+#
+# Makefile for x86_64-specific library files..
+#
+
+.S.o:
+       $(CC) $(AFLAGS) -c $< -o $*.o
+
+L_TARGET = lib.a
+obj-y  = generic-checksum.o old-checksum.o delay.o \
+       usercopy.o getuser.o putuser.o  \
+       checksum_copy.o rwsem_thunk.o
+
+obj-$(CONFIG_IO_DEBUG) += iodebug.o
+obj-$(CONFIG_X86_USE_3DNOW) += mmx.o
+obj-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
+
+include $(TOPDIR)/Rules.make
diff --git a/arch/x86_64/lib/checksum_copy.S b/arch/x86_64/lib/checksum_copy.S
new file mode 100644 (file)
index 0000000..61b188d
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             IP/TCP/UDP checksumming routines
+ *
+ * Authors:    Jorge Cwik, <jorge@laser.satlink.net>
+ *             Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *             Tom May, <ftom@netcom.com>
+ *              Pentium Pro/II routines:
+ *              Alexander Kjeldaas <astor@guardian.no>
+ *              Finn Arne Gangstad <finnag@guardian.no>
+ *             Lots of code moved from tcp.c and ip.c; see those files
+ *             for more names.
+ *
+ * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
+ *                          handling.
+ *             Andi Kleen,  add zeroing on error
+ *                   converted to pure assembler
+ *              Andi Kleen   initial raw port to x86-64
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <asm/errno.h>
+
+
+/* Version for PentiumII/PPro ported to x86-64. Still very raw and
+   does not exploit 64bit.  */         
+
+#define SRC(y...)                      \
+       9999: y;                        \
+       .section __ex_table, "a";       \
+       .quad 9999b, 6001f      ;       \
+       .previous
+
+#define DST(y...)                      \
+       9999: y;                        \
+       .section __ex_table, "a";       \
+       .quad 9999b, 6002f      ;       \
+       .previous
+       
+#define ROUND1(x) \
+       SRC(movl x(%rsi), %ebx  )       ;       \
+       addl %ebx, %eax                 ;       \
+       DST(movl %ebx, x(%rdi)  )       ;
+
+#define ROUND(x) \
+       SRC(movl x(%rsi), %ebx  )       ;       \
+       adcl %ebx, %eax                 ;       \
+       DST(movl %ebx, x(%rdi)  )       ;
+
+#define ARGBASE 0
+
+/*
+       asmlinkage unsigned int csum_partial_copy_generic( const char *src, char *dst, int len, int sum,
+                                                          int *src_err_ptr, int *dst_err_ptr);
+       rdi .. src
+       rsi .. dst      (copy in r12)
+       rdx .. len      (copy in r10)
+       rcx .. sum
+       r8 ..  src_err_ptr
+       r9 ..  dst_err_ptr
+
+       OPTIMIZEME: this routine should take advantage of checksumming 64bits at a time
+*/
+
+       .globl csum_partial_copy_generic                
+csum_partial_copy_generic:
+       pushq %r10
+       pushq %r12
+       pushq %rbx
+       pushq %rbp
+       xchgq %rsi, %rdi
+
+       movq %rdx, %r10
+       movq %rsi, %r12
+       
+       movq %rcx, %rax
+       movq %rdx, %rcx # And now it looks like PII case
+       movl %ecx, %ebx
+       movl %esi, %edx
+       shrl $6, %ecx
+       andl $0x3c, %ebx
+       negq %rbx
+       subq %rbx, %rsi
+       subq %rbx, %rdi
+       lea 3f(%rbx,%rbx), %rbx
+       testq %rsi, %rsi
+       jmp *%rbx
+1:     addq $64,%rsi
+       addq $64,%rdi
+       ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52)    
+       ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36)    
+       ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20)    
+       ROUND (-16) ROUND(-12) ROUND(-8)  ROUND(-4)     
+3:     adcl $0,%eax
+       addl $64,%edx
+       dec %ecx
+       jge 1b
+4:      movq %r10,%rdx
+       andl $3, %edx
+       jz 7f
+       cmpl $2, %edx
+       jb 5f
+SRC(   movw (%rsi), %dx         )
+       leaq 2(%rsi), %rsi
+DST(   movw %dx, (%rdi)         )
+       leaq 2(%rdi), %rdi
+       je 6f
+       shll $16,%edx
+5:
+SRC(   movb (%rsi), %dl         )
+DST(   movb %dl, (%rdi)         )
+6:     addl %edx, %eax
+       adcl $0, %eax
+7:
+.section .fixup, "ax"
+6001:  
+       movl $-EFAULT, (%r8)
+       # zero the complete destination (computing the rest is too much work)
+       movq %r12,%rdi  # dst
+       movq %r10,%rcx  # len
+       xorl %eax,%eax
+       rep; stosb
+       jmp 7b
+6002:  movl $-EFAULT,(%r9)
+       jmp  7b                 
+.previous
+       popq %rbp
+       popq %rbx
+       popq %r12
+       popq %r10
+       ret
+                               
+#undef ROUND
+#undef ROUND1
diff --git a/arch/x86_64/lib/dec_and_lock.c b/arch/x86_64/lib/dec_and_lock.c
new file mode 100644 (file)
index 0000000..ffd4869
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * x86 version of "atomic_dec_and_lock()" using
+ * the atomic "cmpxchg" instruction.
+ *
+ * (For CPU's lacking cmpxchg, we use the slow
+ * generic version, and this one never even gets
+ * compiled).
+ */
+
+#include <linux/spinlock.h>
+#include <asm/atomic.h>
+
+int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
+{
+       int counter;
+       int newcount;
+
+repeat:
+       counter = atomic_read(atomic);
+       newcount = counter-1;
+
+       if (!newcount)
+               goto slow_path;
+
+       asm volatile("lock; cmpxchgl %1,%2"
+               :"=a" (newcount)
+               :"r" (newcount), "m" (atomic->counter), "0" (counter));
+
+       /* If the above failed, "eax" will have changed */
+       if (newcount != counter)
+               goto repeat;
+       return 0;
+
+slow_path:
+       spin_lock(lock);
+       if (atomic_dec_and_test(atomic))
+               return 1;
+       spin_unlock(lock);
+       return 0;
+}
diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c
new file mode 100644 (file)
index 0000000..da077da
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ *     Precise Delay Loops for i386
+ *
+ *     Copyright (C) 1993 Linus Torvalds
+ *     Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ *     The __delay function must _NOT_ be inlined as its execution time
+ *     depends wildly on alignment on many x86 processors. 
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <asm/delay.h>
+
+#ifdef CONFIG_SMP
+#include <asm/smp.h>
+#endif
+
+int x86_udelay_tsc = 0;                /* Delay via TSC */
+
+void __delay(unsigned long loops)
+{
+#ifndef CONFIG_SIMNOW
+       unsigned long bclock, now;
+       
+       rdtscl(bclock);
+       do
+       {
+               rep_nop(); 
+               rdtscl(now);
+       }
+       while((now-bclock) < loops);
+#endif
+}
+
+inline void __const_udelay(unsigned long xloops)
+{
+        __delay(((xloops * current_cpu_data.loops_per_jiffy) >> 32) * HZ);
+}
+
+void __udelay(unsigned long usecs)
+{
+       __const_udelay(usecs * 0x000010c6);  /* 2**32 / 1000000 */
+}
diff --git a/arch/x86_64/lib/generic-checksum.c b/arch/x86_64/lib/generic-checksum.c
new file mode 100644 (file)
index 0000000..f508f57
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * arch/x86_64/lib/checksum.c
+ *
+ * This file contains network checksum routines that are better done
+ * in an architecture-specific manner due to speed..
+ */
+#include <linux/string.h>
+#include <asm/byteorder.h>
+
+static inline unsigned short from64to16(unsigned long x)
+{
+       /* add up 32-bit words for 33 bits */
+       x = (x & 0xffffffff) + (x >> 32);
+       /* add up 16-bit and 17-bit words for 17+c bits */
+       x = (x & 0xffff) + (x >> 16);
+       /* add up 16-bit and 2-bit for 16+c bit */
+       x = (x & 0xffff) + (x >> 16);
+       /* add up carry.. */
+       x = (x & 0xffff) + (x >> 16);
+       return x;
+}
+
+/*
+ * Do a 64-bit checksum on an arbitrary memory area..
+ *
+ * This isn't a great routine, but it's not _horrible_ either. The
+ * inner loop could be unrolled a bit further, and there are better
+ * ways to do the carry, but this is reasonable.
+ */
+static inline unsigned long do_csum(const unsigned char * buff, int len)
+{
+       int odd, count;
+       unsigned long result = 0;
+
+       if (len <= 0)
+               goto out;
+       odd = 1 & (unsigned long) buff;
+       if (odd) {
+               result = *buff << 8;
+               len--;
+               buff++;
+       }
+       count = len >> 1;               /* nr of 16-bit words.. */
+       if (count) {
+               if (2 & (unsigned long) buff) {
+                       result += *(unsigned short *) buff;
+                       count--;
+                       len -= 2;
+                       buff += 2;
+               }
+               count >>= 1;            /* nr of 32-bit words.. */
+               if (count) {
+                       if (4 & (unsigned long) buff) {
+                               result += *(unsigned int *) buff;
+                               count--;
+                               len -= 4;
+                               buff += 4;
+                       }
+                       count >>= 1;    /* nr of 64-bit words.. */
+                       if (count) {
+                               unsigned long carry = 0;
+                               do {
+                                       unsigned long w = *(unsigned long *) buff;
+                                       count--;
+                                       buff += 8;
+                                       result += carry;
+                                       result += w;
+                                       carry = (w > result);
+                               } while (count);
+                               result += carry;
+                               result = (result & 0xffffffff) + (result >> 32);
+                       }
+                       if (len & 4) {
+                               result += *(unsigned int *) buff;
+                               buff += 4;
+                       }
+               }
+               if (len & 2) {
+                       result += *(unsigned short *) buff;
+                       buff += 2;
+               }
+       }
+       if (len & 1)
+               result += *buff;
+       result = from64to16(result);
+       if (odd)
+               result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+out:
+       return result;
+}
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
+{
+       unsigned long result = do_csum(buff, len);
+
+       /* add in old sum, and carry.. */
+       result += sum;
+       /* 32+c bits -> 32 bits */
+       result = (result & 0xffffffff) + (result >> 32);
+       return result;
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+unsigned short ip_compute_csum(unsigned char * buff, int len)
+{
+       return ~from64to16(do_csum(buff,len));
+}
+
diff --git a/arch/x86_64/lib/getuser.S b/arch/x86_64/lib/getuser.S
new file mode 100644 (file)
index 0000000..3932b42
--- /dev/null
@@ -0,0 +1,90 @@
+/*
+ * __get_user functions.
+ *
+ * (C) Copyright 1998 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient, especially as they
+ * return an error value in addition to the "real"
+ * return value.
+ */
+
+/*
+ * __get_user_X
+ *
+ * Inputs:     %rax contains the address
+ *
+ * Outputs:    %rax is error code (0 or -EFAULT)
+ *             %rdx contains zero-extended value
+ * 
+ * %rbx is destroyed.
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+#include <linux/linkage.h>
+#include <asm/page.h>
+#include <asm/errno.h>
+#include <asm/offset.h>
+#include <asm/thread_info.h>
+
+       .text
+       .p2align
+.globl __get_user_1
+__get_user_1:  
+       GET_THREAD_INFO(%rbx)
+       cmpq threadinfo_addr_limit(%rbx),%rax
+       jae bad_get_user
+1:     movzb (%rax),%edx
+       xorq %rax,%rax
+       ret
+
+       .p2align        
+.globl __get_user_2
+__get_user_2:
+       GET_THREAD_INFO(%rbx) 
+       addq $1,%rax
+       jc bad_get_user
+       cmpq threadinfo_addr_limit(%rbx),%rax 
+       jae      bad_get_user
+2:     movzwl -1(%rax),%edx
+       xorq %rax,%rax
+       ret
+
+       .p2align
+.globl __get_user_4
+__get_user_4:
+       GET_THREAD_INFO(%rbx) 
+       addq $3,%rax
+       jc bad_get_user
+       cmpq threadinfo_addr_limit(%rbx),%rax 
+       jae bad_get_user
+3:     movl -3(%rax),%edx
+       xorq %rax,%rax
+       ret
+
+       .p2align
+.globl __get_user_8
+__get_user_8:
+       GET_THREAD_INFO(%rbx) 
+       addq $7,%rax
+       jc bad_get_user
+       cmpq threadinfo_addr_limit(%rbx),%rax
+       jae     bad_get_user
+4:     movq -7(%rax),%rdx
+       xorq %rax,%rax
+       ret
+
+ENTRY(bad_get_user)
+bad_get_user:
+       xorq %rdx,%rdx
+       movq $(-EFAULT),%rax
+       ret
+
+.section __ex_table,"a"
+       .quad 1b,bad_get_user
+       .quad 2b,bad_get_user
+       .quad 3b,bad_get_user
+       .quad 4b,bad_get_user
+.previous
diff --git a/arch/x86_64/lib/iodebug.c b/arch/x86_64/lib/iodebug.c
new file mode 100644 (file)
index 0000000..3f74de6
--- /dev/null
@@ -0,0 +1,11 @@
+#include <asm/io.h>
+
+void * __io_virt_debug(unsigned long x, const char *file, int line)
+{
+       if (x < PAGE_OFFSET) {
+               printk("io mapaddr 0x%05lx not valid at %s:%d!\n", x, file, line);
+               return __va(x);
+       }
+       return (void *)x;
+}
+
diff --git a/arch/x86_64/lib/mmx.c b/arch/x86_64/lib/mmx.c
new file mode 100644 (file)
index 0000000..158e4cb
--- /dev/null
@@ -0,0 +1,377 @@
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+
+#include <asm/i387.h>
+#include <asm/hardirq.h> 
+
+
+/*
+ *     MMX 3DNow! library helper functions
+ *
+ *     To do:
+ *     We can use MMX just for prefetch in IRQ's. This may be a win. 
+ *             (reported so on K6-III)
+ *     We should use a better code neutral filler for the short jump
+ *             leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
+ *     We also want to clobber the filler register so we dont get any
+ *             register forwarding stalls on the filler. 
+ *
+ *     Add *user handling. Checksums are not a win with MMX on any CPU
+ *     tested so far for any MMX solution figured.
+ *
+ *     22/09/2000 - Arjan van de Ven 
+ *             Improved for non-egineering-sample Athlons 
+ *
+ */
+
+#error Don't use these for now, but we'll have to provide optimized functions in future
+void *_mmx_memcpy(void *to, const void *from, size_t len)
+{
+       void *p;
+       int i;
+  
+       if (in_interrupt())
+               return __memcpy(to, from, len);
+       p = to;
+       i = len >> 6; /* len/64 */
+       kernel_fpu_begin();
+
+       __asm__ __volatile__ (
+               "1: prefetch (%0)\n"            /* This set is 28 bytes */
+               "   prefetch 64(%0)\n"
+               "   prefetch 128(%0)\n"
+               "   prefetch 192(%0)\n"
+               "   prefetch 256(%0)\n"
+               "2:  \n"
+               ".section .fixup, \"ax\"\n"
+               "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
+               "   jmp 2b\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 4\n"
+               "       .long 1b, 3b\n"
+               ".previous"
+               : : "r" (from) );
+               
+       
+       for(; i>0; i--)
+       {
+               __asm__ __volatile__ (
+               "1:  prefetch 320(%0)\n"
+               "2:  movq (%0), %%mm0\n"
+               "  movq 8(%0), %%mm1\n"
+               "  movq 16(%0), %%mm2\n"
+               "  movq 24(%0), %%mm3\n"
+               "  movq %%mm0, (%1)\n"
+               "  movq %%mm1, 8(%1)\n"
+               "  movq %%mm2, 16(%1)\n"
+               "  movq %%mm3, 24(%1)\n"
+               "  movq 32(%0), %%mm0\n"
+               "  movq 40(%0), %%mm1\n"
+               "  movq 48(%0), %%mm2\n"
+               "  movq 56(%0), %%mm3\n"
+               "  movq %%mm0, 32(%1)\n"
+               "  movq %%mm1, 40(%1)\n"
+               "  movq %%mm2, 48(%1)\n"
+               "  movq %%mm3, 56(%1)\n"
+               ".section .fixup, \"ax\"\n"
+               "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
+               "   jmp 2b\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 4\n"
+               "       .long 1b, 3b\n"
+               ".previous"
+               : : "r" (from), "r" (to) : "memory");
+               from+=64;
+               to+=64;
+       }
+       /*
+        *      Now do the tail of the block
+        */
+       __memcpy(to, from, len&63);
+       kernel_fpu_end();
+       return p;
+}
+
+#ifdef CONFIG_MK7
+
+/*
+ *     The K7 has streaming cache bypass load/store. The Cyrix III, K6 and
+ *     other MMX using processors do not.
+ */
+
+static void fast_clear_page(void *page)
+{
+       int i;
+
+       kernel_fpu_begin();
+       
+       __asm__ __volatile__ (
+               "  pxor %%mm0, %%mm0\n" : :
+       );
+
+       for(i=0;i<4096/64;i++)
+       {
+               __asm__ __volatile__ (
+               "  movntq %%mm0, (%0)\n"
+               "  movntq %%mm0, 8(%0)\n"
+               "  movntq %%mm0, 16(%0)\n"
+               "  movntq %%mm0, 24(%0)\n"
+               "  movntq %%mm0, 32(%0)\n"
+               "  movntq %%mm0, 40(%0)\n"
+               "  movntq %%mm0, 48(%0)\n"
+               "  movntq %%mm0, 56(%0)\n"
+               : : "r" (page) : "memory");
+               page+=64;
+       }
+       /* since movntq is weakly-ordered, a "sfence" is needed to become
+        * ordered again.
+        */
+       __asm__ __volatile__ (
+               "  sfence \n" : :
+       );
+       kernel_fpu_end();
+}
+
+static void fast_copy_page(void *to, void *from)
+{
+       int i;
+
+       kernel_fpu_begin();
+
+       /* maybe the prefetch stuff can go before the expensive fnsave...
+        * but that is for later. -AV
+        */
+       __asm__ __volatile__ (
+               "1: prefetch (%0)\n"
+               "   prefetch 64(%0)\n"
+               "   prefetch 128(%0)\n"
+               "   prefetch 192(%0)\n"
+               "   prefetch 256(%0)\n"
+               "2:  \n"
+               ".section .fixup, \"ax\"\n"
+               "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
+               "   jmp 2b\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 4\n"
+               "       .long 1b, 3b\n"
+               ".previous"
+               : : "r" (from) );
+
+       for(i=0; i<(4096-320)/64; i++)
+       {
+               __asm__ __volatile__ (
+               "1: prefetch 320(%0)\n"
+               "2: movq (%0), %%mm0\n"
+               "   movntq %%mm0, (%1)\n"
+               "   movq 8(%0), %%mm1\n"
+               "   movntq %%mm1, 8(%1)\n"
+               "   movq 16(%0), %%mm2\n"
+               "   movntq %%mm2, 16(%1)\n"
+               "   movq 24(%0), %%mm3\n"
+               "   movntq %%mm3, 24(%1)\n"
+               "   movq 32(%0), %%mm4\n"
+               "   movntq %%mm4, 32(%1)\n"
+               "   movq 40(%0), %%mm5\n"
+               "   movntq %%mm5, 40(%1)\n"
+               "   movq 48(%0), %%mm6\n"
+               "   movntq %%mm6, 48(%1)\n"
+               "   movq 56(%0), %%mm7\n"
+               "   movntq %%mm7, 56(%1)\n"
+               ".section .fixup, \"ax\"\n"
+               "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
+               "   jmp 2b\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 4\n"
+               "       .long 1b, 3b\n"
+               ".previous"
+               : : "r" (from), "r" (to) : "memory");
+               from+=64;
+               to+=64;
+       }
+       for(i=(4096-320)/64; i<4096/64; i++)
+       {
+               __asm__ __volatile__ (
+               "2: movq (%0), %%mm0\n"
+               "   movntq %%mm0, (%1)\n"
+               "   movq 8(%0), %%mm1\n"
+               "   movntq %%mm1, 8(%1)\n"
+               "   movq 16(%0), %%mm2\n"
+               "   movntq %%mm2, 16(%1)\n"
+               "   movq 24(%0), %%mm3\n"
+               "   movntq %%mm3, 24(%1)\n"
+               "   movq 32(%0), %%mm4\n"
+               "   movntq %%mm4, 32(%1)\n"
+               "   movq 40(%0), %%mm5\n"
+               "   movntq %%mm5, 40(%1)\n"
+               "   movq 48(%0), %%mm6\n"
+               "   movntq %%mm6, 48(%1)\n"
+               "   movq 56(%0), %%mm7\n"
+               "   movntq %%mm7, 56(%1)\n"
+               : : "r" (from), "r" (to) : "memory");
+               from+=64;
+               to+=64;
+       }
+       /* since movntq is weakly-ordered, a "sfence" is needed to become
+        * ordered again.
+        */
+       __asm__ __volatile__ (
+               "  sfence \n" : :
+       );
+       kernel_fpu_end();
+}
+
+#else
+
+/*
+ *     Generic MMX implementation without K7 specific streaming
+ */
+static void fast_clear_page(void *page)
+{
+       int i;
+       
+       kernel_fpu_begin();
+       
+       __asm__ __volatile__ (
+               "  pxor %%mm0, %%mm0\n" : :
+       );
+
+       for(i=0;i<4096/128;i++)
+       {
+               __asm__ __volatile__ (
+               "  movq %%mm0, (%0)\n"
+               "  movq %%mm0, 8(%0)\n"
+               "  movq %%mm0, 16(%0)\n"
+               "  movq %%mm0, 24(%0)\n"
+               "  movq %%mm0, 32(%0)\n"
+               "  movq %%mm0, 40(%0)\n"
+               "  movq %%mm0, 48(%0)\n"
+               "  movq %%mm0, 56(%0)\n"
+               "  movq %%mm0, 64(%0)\n"
+               "  movq %%mm0, 72(%0)\n"
+               "  movq %%mm0, 80(%0)\n"
+               "  movq %%mm0, 88(%0)\n"
+               "  movq %%mm0, 96(%0)\n"
+               "  movq %%mm0, 104(%0)\n"
+               "  movq %%mm0, 112(%0)\n"
+               "  movq %%mm0, 120(%0)\n"
+               : : "r" (page) : "memory");
+               page+=128;
+       }
+
+       kernel_fpu_end();
+}
+
+static void fast_copy_page(void *to, void *from)
+{
+       int i;
+       
+       
+       kernel_fpu_begin();
+
+       __asm__ __volatile__ (
+               "1: prefetch (%0)\n"
+               "   prefetch 64(%0)\n"
+               "   prefetch 128(%0)\n"
+               "   prefetch 192(%0)\n"
+               "   prefetch 256(%0)\n"
+               "2:  \n"
+               ".section .fixup, \"ax\"\n"
+               "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
+               "   jmp 2b\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 4\n"
+               "       .long 1b, 3b\n"
+               ".previous"
+               : : "r" (from) );
+
+       for(i=0; i<4096/64; i++)
+       {
+               __asm__ __volatile__ (
+               "1: prefetch 320(%0)\n"
+               "2: movq (%0), %%mm0\n"
+               "   movq 8(%0), %%mm1\n"
+               "   movq 16(%0), %%mm2\n"
+               "   movq 24(%0), %%mm3\n"
+               "   movq %%mm0, (%1)\n"
+               "   movq %%mm1, 8(%1)\n"
+               "   movq %%mm2, 16(%1)\n"
+               "   movq %%mm3, 24(%1)\n"
+               "   movq 32(%0), %%mm0\n"
+               "   movq 40(%0), %%mm1\n"
+               "   movq 48(%0), %%mm2\n"
+               "   movq 56(%0), %%mm3\n"
+               "   movq %%mm0, 32(%1)\n"
+               "   movq %%mm1, 40(%1)\n"
+               "   movq %%mm2, 48(%1)\n"
+               "   movq %%mm3, 56(%1)\n"
+               ".section .fixup, \"ax\"\n"
+               "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
+               "   jmp 2b\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .align 4\n"
+               "       .long 1b, 3b\n"
+               ".previous"
+               : : "r" (from), "r" (to) : "memory");
+               from+=64;
+               to+=64;
+       }
+       kernel_fpu_end();
+}
+
+
+#endif
+
+/*
+ *     Favour MMX for page clear and copy. 
+ */
+
+static void slow_zero_page(void * page)
+{
+       int d0, d1;
+       __asm__ __volatile__( \
+               "cld\n\t" \
+               "rep ; stosl" \
+               : "=&c" (d0), "=&D" (d1)
+               :"a" (0),"1" (page),"0" (1024)
+               :"memory");
+}
+void mmx_clear_page(void * page)
+{
+       if(in_interrupt())
+               slow_zero_page(page);
+       else
+               fast_clear_page(page);
+}
+
+static void slow_copy_page(void *to, void *from)
+{
+       int d0, d1, d2;
+       __asm__ __volatile__( \
+               "cld\n\t" \
+               "rep ; movsl" \
+               : "=&c" (d0), "=&D" (d1), "=&S" (d2) \
+               : "0" (1024),"1" ((long) to),"2" ((long) from) \
+               : "memory");
+}
+  
+
+void mmx_copy_page(void *to, void *from)
+{
+       if(in_interrupt())
+               slow_copy_page(to, from);
+       else
+               fast_copy_page(to, from);
+}
diff --git a/arch/x86_64/lib/old-checksum.c b/arch/x86_64/lib/old-checksum.c
new file mode 100644 (file)
index 0000000..20d5b8f
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Temporal C versions of the checksum functions until optimized assembler versions
+ * can go in.
+ */
+
+#include <net/checksum.h>
+
+/*
+ * Copy from userspace and compute checksum.  If we catch an exception
+ * then zero the rest of the buffer.
+ */
+unsigned int csum_partial_copy_from_user (const char *src, char *dst,
+                                          int len, unsigned int sum,
+                                          int *err_ptr)
+{
+       int missing;
+
+       missing = copy_from_user(dst, src, len);
+       if (missing) {
+               memset(dst + len - missing, 0, missing);
+               *err_ptr = -EFAULT;
+       }
+               
+       return csum_partial(dst, len, sum);
+}
+
+unsigned int csum_partial_copy_nocheck(const char *src, char *dst, int len, unsigned int sum)
+{
+       memcpy(dst,src,len);
+       return csum_partial(dst,len,sum);
+}
+
+/* Fallback for csum_and_copy_to_user is currently in include/net/checksum.h */
diff --git a/arch/x86_64/lib/putuser.S b/arch/x86_64/lib/putuser.S
new file mode 100644 (file)
index 0000000..4d287ce
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ * __put_user functions.
+ *
+ * (C) Copyright 1998 Linus Torvalds
+ *
+ * These functions have a non-standard call interface
+ * to make them more efficient.
+ */
+
+/*
+ * __put_user_X
+ *
+ * Inputs:     %rax contains the address
+ *             %rdx contains the value
+ *
+ * Outputs:    %rax is error code (0 or -EFAULT)
+ *             %rbx is corrupted (will contain "current_task").
+ *
+ * These functions should not modify any other registers,
+ * as they get called from within inline assembly.
+ */
+
+/* FIXME: putuser.S should be really merged with getuser.S, and preprocessor should be used to keep code duplication lower */
+
+#include <linux/linkage.h>
+#include <asm/page.h>
+#include <asm/errno.h>
+#include <asm/offset.h>
+#include <asm/thread_info.h>
+
+.text
+.p2align
+.globl __put_user_1
+__put_user_1:
+       GET_THREAD_INFO(%rbx)
+       cmpq threadinfo_addr_limit(%rbx),%rax
+       jae bad_put_user
+1:     movb %dl,(%rax)
+       xorq %rax,%rax
+       ret
+
+.p2align
+.globl __put_user_2
+__put_user_2:
+       GET_THREAD_INFO(%rbx) 
+       addq $1,%rax
+       jc              bad_put_user
+       cmpq    threadinfo_addr_limit(%rbx),%rax
+       jae     bad_put_user
+2:     movw %dx,-1(%rax)
+       xorq %rax,%rax
+       ret
+
+.p2align
+.globl __put_user_4
+__put_user_4:
+       GET_THREAD_INFO(%rbx) 
+       addq $3,%rax
+       jc              bad_put_user
+       cmpq    threadinfo_addr_limit(%rbx),%rax
+       jae     bad_put_user
+3:     movl %edx,-3(%rax)
+       xorq %rax,%rax
+       ret
+
+.p2align
+.globl __put_user_8
+__put_user_8:
+       GET_THREAD_INFO(%rbx) 
+       addq $7,%rax
+       jc      bad_put_user
+       cmpq    threadinfo_addr_limit(%rbx),%rax
+       jae     bad_put_user
+4:     movq %rdx,-7(%rax)
+       xorq %rax,%rax
+       ret
+
+ENTRY(bad_put_user)    
+bad_put_user:
+       movq $(-EFAULT),%rax
+       ret
+
+.section __ex_table,"a"
+       .quad 1b,bad_put_user
+       .quad 2b,bad_put_user
+       .quad 3b,bad_put_user
+       .quad 4b,bad_put_user   
+.previous
diff --git a/arch/x86_64/lib/rwsem_thunk.S b/arch/x86_64/lib/rwsem_thunk.S
new file mode 100644 (file)
index 0000000..e63ef88
--- /dev/null
@@ -0,0 +1,27 @@
+       #include <asm/calling.h>
+
+       /*
+        * Save registers for the slow path of semaphores here to avoid
+        * disturbance of register allocation in fast paths with function calls.
+        * Written 2001 by Andi Kleen.
+        */
+       
+       .macro rwsem_thunk name,func
+       .globl \name
+\name: 
+       SAVE_ARGS
+       movq %rax,%rdi
+       call \func
+       jmp  restore
+       .endm
+
+       rwsem_thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed
+       rwsem_thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed
+       rwsem_thunk rwsem_wake_thunk,rwsem_wake
+       /* This does not really belong here, but the macros are so
+          convenient. */ 
+       rwsem_thunk do_softirq_thunk,do_softirq
+
+restore:       
+       RESTORE_ARGS
+       ret
diff --git a/arch/x86_64/lib/usercopy.c b/arch/x86_64/lib/usercopy.c
new file mode 100644 (file)
index 0000000..07ccdfb
--- /dev/null
@@ -0,0 +1,147 @@
+/* 
+ * User address space access functions.
+ * The non inlined parts of asm-i386/uaccess.h are here.
+ *
+ * Copyright 1997 Andi Kleen <ak@muc.de>
+ * Copyright 1997 Linus Torvalds
+ */
+#include <linux/config.h>
+#include <asm/uaccess.h>
+#include <asm/mmx.h>
+
+unsigned long
+__generic_copy_to_user(void *to, const void *from, unsigned long n)
+{
+       prefetch(from);
+       if (access_ok(VERIFY_WRITE, to, n))
+               __copy_user(to,from,n);
+       return n;
+}
+
+unsigned long
+__generic_copy_from_user(void *to, const void *from, unsigned long n)
+{
+       prefetchw(to);
+       if (access_ok(VERIFY_READ, from, n))
+               __copy_user_zeroing(to,from,n);
+       else
+               memset(to, 0, n);
+       return n;
+}
+
+/*
+ * Copy a null terminated string from userspace.
+ */
+
+#define __do_strncpy_from_user(dst,src,count,res)                         \
+do {                                                                      \
+       long __d0, __d1, __d2;                                             \
+       __asm__ __volatile__(                                              \
+               "       testq %1,%1\n"                                     \
+               "       jz 2f\n"                                           \
+               "0:     lodsb\n"                                           \
+               "       stosb\n"                                           \
+               "       testb %%al,%%al\n"                                 \
+               "       jz 1f\n"                                           \
+               "       decq %1\n"                                         \
+               "       jnz 0b\n"                                          \
+               "1:     subq %1,%0\n"                                      \
+               "2:\n"                                                     \
+               ".section .fixup,\"ax\"\n"                                 \
+               "3:     movq %5,%0\n"                                      \
+               "       jmp 2b\n"                                          \
+               ".previous\n"                                              \
+               ".section __ex_table,\"a\"\n"                              \
+               "       .align 4\n"                                        \
+               "       .quad 0b,3b\n"                                     \
+               ".previous"                                                \
+               : "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1),      \
+                 "=&D" (__d2)                                             \
+               : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
+               : "memory");                                               \
+} while (0)
+
+long
+__strncpy_from_user(char *dst, const char *src, long count)
+{
+       long res;
+       __do_strncpy_from_user(dst, src, count, res);
+       return res;
+}
+
+long
+strncpy_from_user(char *dst, const char *src, long count)
+{
+       long res = -EFAULT;
+       if (access_ok(VERIFY_READ, src, 1))
+               __do_strncpy_from_user(dst, src, count, res);
+       return res;
+}
+
+
+/*
+ * Zero Userspace
+ */
+
+#define __do_clear_user(addr,size)                                     \
+do {                                                                   \
+       long __d0;                                                      \
+       __asm__ __volatile__(                                           \
+               "cld\n" \
+               "0:     rep; stosl\n"                                   \
+               "       movq %2,%0\n"                                   \
+               "1:     rep; stosb\n"                                   \
+               "2:\n"                                                  \
+               ".section .fixup,\"ax\"\n"                              \
+               "3:     lea 0(%2,%0,4),%0\n"                            \
+               "       jmp 2b\n"                                       \
+               ".previous\n"                                           \
+               ".section __ex_table,\"a\"\n"                           \
+               "       .align 4\n"                                     \
+               "       .quad 0b,3b\n"                                  \
+               "       .quad 1b,2b\n"                                  \
+               ".previous"                                             \
+               : "=&c"(size), "=&D" (__d0)                             \
+               : "r"(size & 3), "0"(size / 4), "1"(addr), "a"(0));     \
+} while (0)
+
+unsigned long
+clear_user(void *to, unsigned long n)
+{
+       if (access_ok(VERIFY_WRITE, to, n))
+               __do_clear_user(to, n);
+       return n;
+}
+
+unsigned long
+__clear_user(void *to, unsigned long n)
+{
+       __do_clear_user(to, n);
+       return n;
+}
+
+/*
+ * Return the size of a string (including the ending 0)
+ *
+ * Return 0 on exception, a value greater than N if too long
+ */
+
+long strnlen_user(const char *s, long n)
+{
+       unsigned long res = 0;
+       char c;
+
+       if (!access_ok(VERIFY_READ, s, n))
+               return 0;
+
+       while (1) {
+               if (get_user(c, s))
+                       return 0;
+               if (!c)
+                       return res+1;
+               if (res>n)
+                       return n+1;
+               res++;
+               s++;
+       }
+}
diff --git a/arch/x86_64/mm/Makefile b/arch/x86_64/mm/Makefile
new file mode 100644 (file)
index 0000000..942a427
--- /dev/null
@@ -0,0 +1,13 @@
+#
+# Makefile for the linux i386-specific parts of the memory manager.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+# Note 2! The CFLAGS definition is now in the main makefile...
+
+O_TARGET := mm.o
+obj-y   := init.o fault.o ioremap.o extable.o
+
+include $(TOPDIR)/Rules.make
diff --git a/arch/x86_64/mm/extable.c b/arch/x86_64/mm/extable.c
new file mode 100644 (file)
index 0000000..d71f555
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * linux/arch/i386/mm/extable.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <asm/uaccess.h>
+
+extern const struct exception_table_entry __start___ex_table[];
+extern const struct exception_table_entry __stop___ex_table[];
+
+static inline unsigned long
+search_one_table(const struct exception_table_entry *first,
+                const struct exception_table_entry *last,
+                unsigned long value)
+{
+        while (first <= last) {
+               const struct exception_table_entry *mid;
+               long diff;
+
+               mid = (last - first) / 2 + first;
+               diff = mid->insn - value;
+                if (diff == 0)
+                        return mid->fixup;
+                else if (diff < 0)
+                        first = mid+1;
+                else
+                        last = mid-1;
+        }
+        return 0;
+}
+
+extern spinlock_t modlist_lock;
+
+unsigned long
+search_exception_table(unsigned long addr)
+{
+       unsigned long ret = 0;
+       unsigned long flags;
+
+#ifndef CONFIG_MODULES
+       /* There is only the kernel to search.  */
+       ret = search_one_table(__start___ex_table, __stop___ex_table-1, addr);
+       return ret;
+#else
+       /* The kernel is the last "module" -- no need to treat it special.  */
+       struct module *mp;
+
+       spin_lock_irqsave(&modlist_lock, flags);
+       for (mp = module_list; mp != NULL; mp = mp->next) {
+               if (mp->ex_table_start == NULL || !(mp->flags&(MOD_RUNNING|MOD_INITIALIZING)))
+                       continue;
+               ret = search_one_table(mp->ex_table_start,
+                                      mp->ex_table_end - 1, addr);
+               if (ret)
+                       break;
+       }
+       spin_unlock_irqrestore(&modlist_lock, flags);
+       return ret;
+#endif
+}
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
new file mode 100644 (file)
index 0000000..d0d33d0
--- /dev/null
@@ -0,0 +1,324 @@
+/*
+ *  linux/arch/x86-64/mm/fault.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/tty.h>
+#include <linux/vt_kern.h>             /* For unblank_screen() */
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgalloc.h>
+#include <asm/hardirq.h>
+
+extern void die(const char *,struct pt_regs *,long);
+
+asmlinkage void do_invalid_op(struct pt_regs *, unsigned long);
+extern unsigned long idt;
+
+extern spinlock_t console_lock, timerlist_lock;
+
+void bust_spinlocks(int yes)
+{
+       spin_lock_init(&timerlist_lock);
+       if (yes) {
+               oops_in_progress = 1;
+#ifdef CONFIG_SMP
+               global_irq_lock = 0;    /* Many serial drivers do __global_cli() */
+#endif
+       } else {
+       int loglevel_save = console_loglevel;
+#ifdef CONFIG_VT
+               unblank_screen();
+#endif
+               oops_in_progress = 0;
+               /*
+                * OK, the message is on the console.  Now we call printk()
+                * without oops_in_progress set so that printk will give klogd
+                * a poke.  Hold onto your hats...
+                */
+               console_loglevel = 15;          /* NMI oopser may have shut the console up */
+               printk(" ");
+               console_loglevel = loglevel_save;
+       }
+}
+
+void do_BUG(const char *file, int line)
+{
+       bust_spinlocks(1);
+       printk("kernel BUG at %s:%d!\n", file, line);
+}
+
+
+void dump_pagetable(unsigned long address)
+{
+       static char *name[] = { "PML4", "PGD", "PDE", "PTE" }; 
+       int i, shift;
+       unsigned long page;
+
+       asm("movq %%cr3,%0":"=r" (page));
+       shift = 9+9+9+12;
+       address &= ~0xFFFF000000000000UL;
+       for (i = 0; i < 4; i++) { 
+               page = ((unsigned long *) __va(page))[(address >> shift) & 0x1FFU];   
+               printk("%s: %016lx ", name[i], page); 
+               if ((page & (1 | (1<<7))) != 1) /* Not present or 2MB page */
+                       break;
+               page &= ~0xFFFUL;
+               shift -= 9;
+       } 
+       printk("\n");
+}
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ *
+ * error_code:
+ *     bit 0 == 0 means no page found, 1 means protection fault
+ *     bit 1 == 0 means read, 1 means write
+ *     bit 2 == 0 means kernel, 1 means user-mode
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
+{
+       struct task_struct *tsk;
+       struct mm_struct *mm;
+       struct vm_area_struct * vma;
+       unsigned long address;
+       unsigned long fixup;
+       int write;
+       siginfo_t info;
+
+       /* get the address */
+       __asm__("movq %%cr2,%0":"=r" (address));
+
+
+       tsk = current;
+       mm = tsk->mm;
+       info.si_code = SEGV_MAPERR;
+
+       if (address >= TASK_SIZE) 
+               goto vmalloc_fault;
+
+
+       /*
+        * If we're in an interrupt or have no user
+        * context, we must not take the fault..
+        */
+       if (in_interrupt() || !mm)
+               goto no_context;
+
+       down_read(&mm->mmap_sem);
+
+       vma = find_vma(mm, address);
+
+#if 0
+       printk("fault at %lx rip:%lx rsp:%lx err:%lx thr:%x ", address,regs->rip,regs->rsp,error_code,tsk->thread.flags);
+       if (vma) 
+               printk("vma %lx-%lx prot:%lx flags:%lx",vma->vm_start,vma->vm_end,
+                      vma->vm_page_prot,vma->vm_flags); 
+       printk("\n");
+#endif
+
+
+       if (!vma)
+               goto bad_area;
+       if (vma->vm_start <= address)
+               goto good_area;
+       if (!(vma->vm_flags & VM_GROWSDOWN))
+               goto bad_area;
+       if (error_code & 4) {
+               // XXX: align red zone size with ABI 
+               if (address + 128 < regs->rsp)
+                       goto bad_area;
+       }
+       if (expand_stack(vma, address))
+               goto bad_area;
+/*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it..
+ */
+good_area:
+       info.si_code = SEGV_ACCERR;
+       write = 0;
+       switch (error_code & 3) {
+               default:        /* 3: write, present */
+                       /* fall through */
+               case 2:         /* write, not present */
+                       if (!(vma->vm_flags & VM_WRITE))
+                               goto bad_area;
+                       write++;
+                       break;
+               case 1:         /* read, present */
+                       goto bad_area;
+               case 0:         /* read, not present */
+                       if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+                               goto bad_area;
+       }
+
+survive:
+       /*
+        * If for any reason at all we couldn't handle the fault,
+        * make sure we exit gracefully rather than endlessly redo
+        * the fault.
+        */
+       switch (handle_mm_fault(mm, vma, address, write)) {
+       case 1:
+               tsk->min_flt++;
+               break;
+       case 2:
+               tsk->maj_flt++;
+               break;
+       case 0:
+               goto do_sigbus;
+       default:
+               goto out_of_memory;
+       }
+
+       up_read(&mm->mmap_sem);
+       return;
+
+/*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+bad_area:
+       up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+
+       /* User mode accesses just cause a SIGSEGV */
+       if (error_code & 4) {
+               
+               printk(KERN_ERR "%.20s[%d] segfaulted rip:%lx rsp:%lx adr:%lx err:%lx\n", 
+                      tsk->comm, tsk->pid, 
+                      regs->rip, regs->rsp, address, error_code); 
+       
+               tsk->thread.cr2 = address;
+               tsk->thread.error_code = error_code;
+               tsk->thread.trap_no = 14;
+               info.si_signo = SIGSEGV;
+               info.si_errno = 0;
+               /* info.si_code has been set above */
+               info.si_addr = (void *)address;
+               force_sig_info(SIGSEGV, &info, tsk);
+               return;
+       }
+
+no_context:
+       
+       /* Are we prepared to handle this kernel fault?  */
+       if ((fixup = search_exception_table(regs->rip)) != 0) {
+               regs->rip = fixup;
+               return;
+       }
+
+/*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+
+       bust_spinlocks(1); 
+
+       if (address < PAGE_SIZE)
+               printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
+       else
+               printk(KERN_ALERT "Unable to handle kernel paging request");
+       printk(" at virtual address %016lx\n",address);
+       printk(" printing rip:\n");
+       printk("%016lx\n", regs->rip);
+       dump_pagetable(address);
+       die("Oops", regs, error_code);
+       bust_spinlocks(0); 
+       do_exit(SIGKILL);
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+out_of_memory:
+       up_read(&mm->mmap_sem);
+       if (current->pid == 1) { 
+               yield();
+               down_read(&mm->mmap_sem);
+               goto survive;
+       }
+       printk("VM: killing process %s\n", tsk->comm);
+       if (error_code & 4)
+               do_exit(SIGKILL);
+       goto no_context;
+
+do_sigbus:
+       up_read(&mm->mmap_sem);
+
+       /*
+        * Send a sigbus, regardless of whether we were in kernel
+        * or user mode.
+        */
+       tsk->thread.cr2 = address;
+       tsk->thread.error_code = error_code;
+       tsk->thread.trap_no = 14;
+       info.si_signo = SIGBUS;
+       info.si_errno = 0;
+       info.si_code = BUS_ADRERR;
+       info.si_addr = (void *)address;
+       force_sig_info(SIGBUS, &info, tsk);
+
+       /* Kernel mode? Handle exceptions or die */
+       if (!(error_code & 4))
+               goto no_context;
+
+
+vmalloc_fault:
+       {
+               /*
+                * Synchronize the kernel space top level page-table
+                * with the 'reference' page table.
+                * Currently it only works for first and last 512 GB of
+                * kernel memory FIXME
+                *
+                */
+               level4_t *l4pd = level4_offset_k(address);
+               int offset = __pgd_offset(address);
+               pgd_t *pgd, *pgd_k;
+               pmd_t *pmd, *pmd_k;
+
+               if (! level4_val(*l4pd)) {
+                       printk(KERN_ERR "fatal - no entry in level4_page for %lx\n",
+                               address);
+                       goto bad_area_nosemaphore;
+               }
+               pgd = level3_offset_k(l4pd, address);
+               pgd_k = init_mm.pgd + offset;
+
+               if (!pgd_present(*pgd)) {
+                       if (!pgd_present(*pgd_k))
+                               goto bad_area_nosemaphore;
+                       set_pgd(pgd, *pgd_k);
+                       return;
+               }
+
+               pmd = pmd_offset(pgd, address);
+               pmd_k = pmd_offset(pgd_k, address);
+
+               if (pmd_present(*pmd) || !pmd_present(*pmd_k))
+                       goto bad_area_nosemaphore;
+               set_pmd(pmd, *pmd_k);
+               return;
+       }
+}
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
new file mode 100644 (file)
index 0000000..c270db3
--- /dev/null
@@ -0,0 +1,387 @@
+/*
+ *  linux/arch/i386/mm/init.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ *  Copyright (C) 2000  Pavel Machek <pavel@suse.cz>
+ */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#ifdef CONFIG_BLK_DEV_INITRD
+#include <linux/blk.h>
+#endif
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/dma.h>
+#include <asm/fixmap.h>
+#include <asm/e820.h>
+#include <asm/apic.h>
+#include <asm/tlb.h>
+
+mmu_gather_t mmu_gathers[NR_CPUS];
+
+static unsigned long totalram_pages;
+
+int do_check_pgt_cache(int low, int high)
+{
+       int freed = 0;
+       if(read_pda(pgtable_cache_sz) > high) {
+               do {
+                       if (read_pda(pgd_quick)) {
+                               pgd_free_slow(pgd_alloc_one_fast());
+                               freed++;
+                       }
+                       if (read_pda(pmd_quick)) {
+                               pmd_free_slow(pmd_alloc_one_fast(NULL, 0));
+                               freed++;
+                       }
+                       if (read_pda(pte_quick)) {
+                               pte_free_slow(pte_alloc_one_fast(NULL, 0));
+                               freed++;
+                       }
+               } while(read_pda(pgtable_cache_sz) > low);
+       }
+       return freed;
+}
+
+/*
+ * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
+ * physical space so we can cache the place of the first one and move
+ * around without checking the pgd every time.
+ */
+
+void show_mem(void)
+{
+       int i, total = 0, reserved = 0;
+       int shared = 0, cached = 0;
+
+       printk("Mem-info:\n");
+       show_free_areas();
+       printk("Free swap:       %6dkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
+       i = max_mapnr;
+       while (i-- > 0) {
+               total++;
+               if (PageReserved(mem_map+i))
+                       reserved++;
+               else if (PageSwapCache(mem_map+i))
+                       cached++;
+               else if (page_count(mem_map+i))
+                       shared += page_count(mem_map+i) - 1;
+       }
+       printk("%d pages of RAM\n", total);
+       printk("%d reserved pages\n",reserved);
+       printk("%d pages shared\n",shared);
+       printk("%d pages swap cached\n",cached);
+       printk("%ld pages in page table cache\n",read_pda(pgtable_cache_sz));
+       show_buffers();
+}
+
+/* References to section boundaries */
+
+extern char _text, _etext, _edata, __bss_start, _end;
+extern char __init_begin, __init_end;
+
+int after_bootmem;
+
+static void *spp_getpage(void)
+{ 
+       void *ptr;
+       if (after_bootmem)
+               ptr = (void *) get_free_page(GFP_ATOMIC); 
+       else
+               ptr = alloc_bootmem_low(PAGE_SIZE); 
+       if (!ptr)
+               panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
+       return ptr;
+} 
+
+static void set_pte_phys(unsigned long vaddr,
+                        unsigned long phys, pgprot_t prot)
+{
+       level4_t *level4;
+       pgd_t *pgd;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       level4 = level4_offset_k(vaddr);
+       if (level4_none(*level4)) {
+               printk("LEVEL4 FIXMAP MISSING, it should be setup in head.S!\n");
+               return;
+       }
+       pgd = level3_offset_k(level4, vaddr);
+       if (pgd_none(*pgd)) {
+               pmd = (pmd_t *) spp_getpage(); 
+               set_pgd(pgd, __pgd(__pa(pmd) + 0x7));
+               if (pmd != pmd_offset(pgd, 0)) {
+                       printk("PAGETABLE BUG #01!\n");
+                       return;
+               }
+       }
+       pmd = pmd_offset(pgd, vaddr);
+       if (pmd_none(*pmd)) {
+               pte = (pte_t *) spp_getpage();
+               set_pmd(pmd, __pmd(__pa(pte) + 0x7));
+               if (pte != pte_offset(pmd, 0)) {
+                       printk("PAGETABLE BUG #02!\n");
+                       return;
+               }
+       }
+       pte = pte_offset(pmd, vaddr);
+       if (pte_val(*pte))
+               pte_ERROR(*pte);
+       set_pte(pte, mk_pte_phys(phys, prot));
+
+       /*
+        * It's enough to flush this one mapping.
+        * (PGE mappings get flushed as well)
+        */
+       __flush_tlb_one(vaddr);
+}
+
+/* NOTE: this is meant to be run only at boot */
+void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+{
+       unsigned long address = __fix_to_virt(idx);
+
+       if (idx >= __end_of_fixed_addresses) {
+               printk("Invalid __set_fixmap\n");
+               return;
+       }
+       set_pte_phys(address, phys, prot);
+}
+
+static void __init pagetable_init (void)
+{
+       unsigned long paddr, end;
+       pgd_t *pgd;
+       int i, j;
+       pmd_t *pmd;
+
+       /*
+        * This can be zero as well - no problem, in that case we exit
+        * the loops anyway due to the PTRS_PER_* conditions.
+        */
+       end = (unsigned long) max_low_pfn*PAGE_SIZE;
+       if (end > 0x8000000000) {
+               printk("Temporary supporting only 512G of global RAM\n");
+               end = 0x8000000000;
+               max_low_pfn = 0x8000000000 >> PAGE_SHIFT;
+       }
+
+       i = __pgd_offset(PAGE_OFFSET);
+       pgd = level3_physmem_pgt + i;
+
+       for (; i < PTRS_PER_PGD; pgd++, i++) {
+               paddr = i*PGDIR_SIZE;
+               if (paddr >= end)
+                       break;
+               if (i)
+                       pmd = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+               else
+                       pmd = level2_kernel_pgt;
+
+               set_pgd(pgd, __pgd(__pa(pmd) + 0x7));
+               for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
+                       unsigned long __pe;
+
+                       paddr = i*PGDIR_SIZE + j*PMD_SIZE;
+                       if (paddr >= end)
+                               break;
+
+                       __pe = _KERNPG_TABLE + _PAGE_PSE + paddr + _PAGE_GLOBAL;
+                       set_pmd(pmd, __pmd(__pe));
+               }
+       }
+
+       /*
+        * Add low memory identity-mappings - SMP needs it when
+        * starting up on an AP from real-mode. In the non-PAE
+        * case we already have these mappings through head.S.
+        * All user-space mappings are explicitly cleared after
+        * SMP startup.
+        */
+#ifdef FIXME
+       pgd_base [0] is not what you think, this needs to be rewritten for SMP.
+       pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
+#endif
+}
+
+void __init zap_low_mappings (void)
+{
+       int i;
+       /*
+        * Zap initial low-memory mappings.
+        *
+        * Note that "pgd_clear()" doesn't do it for
+        * us in this case, because pgd_clear() is a
+        * no-op in the 2-level case (pmd_clear() is
+        * the thing that clears the page-tables in
+        * that case).
+        */
+       for (i = 0; i < USER_PTRS_PER_PGD; i++)
+               pgd_clear(swapper_pg_dir+i);
+       flush_tlb_all();
+}
+
+/*
+ * paging_init() sets up the page tables - note that the first 4MB are
+ * already mapped by head.S.
+ *
+ * This routines also unmaps the page at virtual kernel address 0, so
+ * that we can trap those pesky NULL-reference errors in the kernel.
+ */
+void __init paging_init(void)
+{
+       asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
+
+       pagetable_init();
+
+       __flush_tlb_all();
+
+       {
+               unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+               unsigned int max_dma, low;
+
+               max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+               low = max_low_pfn;
+
+               if (low < max_dma)
+                       zones_size[ZONE_DMA] = low;
+               else {
+                       zones_size[ZONE_DMA] = max_dma;
+                       zones_size[ZONE_NORMAL] = low - max_dma;
+               }
+               free_area_init(zones_size);
+       }
+       return;
+}
+
+
+static inline int page_is_ram (unsigned long pagenr)
+{
+       int i;
+
+       for (i = 0; i < e820.nr_map; i++) {
+               unsigned long addr, end;
+
+               if (e820.map[i].type != E820_RAM)       /* not usable memory */
+                       continue;
+               /*
+                *      !!!FIXME!!! Some BIOSen report areas as RAM that
+                *      are not. Notably the 640->1Mb area. We need a sanity
+                *      check here.
+                */
+               addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
+               end = (e820.map[i].addr+e820.map[i].size) >> PAGE_SHIFT;
+               if  ((pagenr >= addr) && (pagenr < end))
+                       return 1;
+       }
+       return 0;
+}
+
+void __init mem_init(void)
+{
+       int codesize, reservedpages, datasize, initsize;
+       int tmp;
+
+       if (!mem_map)
+               BUG();
+
+       max_mapnr = num_physpages = max_low_pfn;
+       high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+
+       /* clear the zero-page */
+       memset(empty_zero_page, 0, PAGE_SIZE);
+
+       /* this will put all low memory onto the freelists */
+       totalram_pages += free_all_bootmem();
+
+       after_bootmem = 1;
+
+       reservedpages = 0;
+       for (tmp = 0; tmp < max_low_pfn; tmp++)
+               /*
+                * Only count reserved RAM pages
+                */
+               if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
+                       reservedpages++;
+       codesize =  (unsigned long) &_etext - (unsigned long) &_text;
+       datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
+       initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+       printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
+               (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+               max_mapnr << (PAGE_SHIFT-10),
+               codesize >> 10,
+               reservedpages << (PAGE_SHIFT-10),
+               datasize >> 10,
+               initsize >> 10);
+
+       /*
+        * Subtle. SMP is doing it's boot stuff late (because it has to
+        * fork idle threads) - but it also needs low mappings for the
+        * protected-mode entry to work. We zap these entries only after
+        * the WP-bit has been tested.
+        */
+#ifndef CONFIG_SMP
+       zap_low_mappings();
+#endif
+}
+
+void free_initmem(void)
+{
+       unsigned long addr;
+
+       addr = (unsigned long)(&__init_begin);
+       for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
+               ClearPageReserved(virt_to_page(addr));
+               set_page_count(virt_to_page(addr), 1);
+               free_page(addr);
+               totalram_pages++;
+       }
+       printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+       if (start < (unsigned long)&_end)
+               return;
+       printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+       for (; start < end; start += PAGE_SIZE) {
+               ClearPageReserved(virt_to_page(start));
+               set_page_count(virt_to_page(start), 1);
+               free_page(start);
+               totalram_pages++;
+       }
+}
+#endif
+
+void si_meminfo(struct sysinfo *val)
+{
+       val->totalram = totalram_pages;
+       val->sharedram = 0;
+       val->freeram = nr_free_pages();
+       val->bufferram = atomic_read(&buffermem_pages);
+       val->totalhigh = 0;
+       val->freehigh = nr_free_highpages();
+       val->mem_unit = PAGE_SIZE;
+       return;
+}
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
new file mode 100644 (file)
index 0000000..55b8b3f
--- /dev/null
@@ -0,0 +1,163 @@
+/*
+ * arch/i386/mm/ioremap.c
+ *
+ * Re-map IO memory to kernel address space so that we can access it.
+ * This is needed for high PCI addresses that aren't mapped in the
+ * 640k-1MB IO memory area on PC's
+ *
+ * (C) Copyright 1995 1996 Linus Torvalds
+ */
+
+#include <linux/vmalloc.h>
+#include <asm/io.h>
+#include <asm/pgalloc.h>
+
+static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
+       unsigned long phys_addr, unsigned long flags)
+{
+       unsigned long end;
+
+       address &= ~PMD_MASK;
+       end = address + size;
+       if (end > PMD_SIZE)
+               end = PMD_SIZE;
+       if (address >= end)
+               BUG();
+       do {
+               if (!pte_none(*pte)) {
+                       printk("remap_area_pte: page already exists\n");
+                       BUG();
+               }
+               set_pte(pte, mk_pte_phys(phys_addr, __pgprot(_PAGE_PRESENT | _PAGE_RW | 
+                                       _PAGE_DIRTY | _PAGE_ACCESSED | flags)));
+               address += PAGE_SIZE;
+               phys_addr += PAGE_SIZE;
+               pte++;
+       } while (address && (address < end));
+}
+
+static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
+       unsigned long phys_addr, unsigned long flags)
+{
+       unsigned long end;
+
+       address &= ~PGDIR_MASK;
+       end = address + size;
+       if (end > PGDIR_SIZE)
+               end = PGDIR_SIZE;
+       phys_addr -= address;
+       if (address >= end)
+               BUG();
+       do {
+               pte_t * pte = pte_alloc(&init_mm, pmd, address);
+               if (!pte)
+                       return -ENOMEM;
+               remap_area_pte(pte, address, end - address, address + phys_addr, flags);
+               address = (address + PMD_SIZE) & PMD_MASK;
+               pmd++;
+       } while (address && (address < end));
+       return 0;
+}
+
+static int remap_area_pages(unsigned long address, unsigned long phys_addr,
+                                unsigned long size, unsigned long flags)
+{
+       int error;
+       pgd_t * dir;
+       unsigned long end = address + size;
+
+       phys_addr -= address;
+       dir = pgd_offset(&init_mm, address);
+       flush_cache_all();
+       if (address >= end)
+               BUG();
+       spin_lock(&init_mm.page_table_lock);
+       do {
+               pmd_t *pmd;
+               pmd = pmd_alloc(&init_mm, dir, address);
+               error = -ENOMEM;
+               if (!pmd)
+                       break;
+               if (remap_area_pmd(pmd, address, end - address,
+                                        phys_addr + address, flags))
+                       break;
+               error = 0;
+               address = (address + PGDIR_SIZE) & PGDIR_MASK;
+               dir++;
+       } while (address && (address < end));
+       spin_unlock(&init_mm.page_table_lock);
+       flush_tlb_all();
+       return error;
+}
+
+/*
+ * Generic mapping function (not visible outside):
+ */
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access high addresses
+ * directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
+{
+       void * addr;
+       struct vm_struct * area;
+       unsigned long offset, last_addr;
+
+       /* Don't allow wraparound or zero size */
+       last_addr = phys_addr + size - 1;
+       if (!size || last_addr < phys_addr)
+               return NULL;
+
+       /*
+        * Don't remap the low PCI/ISA area, it's always mapped..
+        */
+       if (phys_addr >= 0xA0000 && last_addr < 0x100000)
+               return phys_to_virt(phys_addr);
+
+       /*
+        * Don't allow anybody to remap normal RAM that we're using..
+        */
+       if (phys_addr < virt_to_phys(high_memory)) {
+               char *t_addr, *t_end;
+               struct page *page;
+
+               t_addr = __va(phys_addr);
+               t_end = t_addr + (size - 1);
+          
+               for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
+                       if(!PageReserved(page))
+                               return NULL;
+       }
+
+       /*
+        * Mappings have to be page-aligned
+        */
+       offset = phys_addr & ~PAGE_MASK;
+       phys_addr &= PAGE_MASK;
+       size = PAGE_ALIGN(last_addr) - phys_addr;
+
+       /*
+        * Ok, go for it..
+        */
+       area = get_vm_area(size, VM_IOREMAP);
+       if (!area)
+               return NULL;
+       addr = area->addr;
+       if (remap_area_pages(VMALLOC_VMADDR(addr), phys_addr, size, flags)) {
+               vfree(addr);
+               return NULL;
+       }
+       return (void *) (offset + (char *)addr);
+}
+
+void iounmap(void *addr)
+{
+       if (addr > high_memory)
+               return vfree((void *) (PAGE_MASK & (unsigned long) addr));
+}
diff --git a/arch/x86_64/tools/Makefile b/arch/x86_64/tools/Makefile
new file mode 100644 (file)
index 0000000..6974ff0
--- /dev/null
@@ -0,0 +1,31 @@
+
+TARGET = $(TOPDIR)/include/asm-x86_64/offset.h
+
+all: 
+
+mrproper:
+
+fastdep: $(TARGET)
+
+.PHONY: all
+
+$(TARGET): offset.h
+       cmp -s $^ $@ || (cp $^ $(TARGET).new && mv $(TARGET).new $(TARGET))
+
+.PHONY : offset.h all modules modules_install
+
+offset.h: offset.sed offset.c FORCE_RECOMPILE
+       $(CC) $(CFLAGS) -S -o offset.tmp offset.c
+       sed -n -f offset.sed < offset.tmp > offset.h   
+
+FORCE_RECOMPILE:
+
+clean:
+       rm -f offset.[hs] $(TARGET).new offset.tmp
+
+mrproper:      
+       rm -f offset.[hs] $(TARGET)
+       rm -f $(TARGET)
+
+include $(TOPDIR)/Rules.make
+
diff --git a/arch/x86_64/tools/offset.c b/arch/x86_64/tools/offset.c
new file mode 100644 (file)
index 0000000..88e72b0
--- /dev/null
@@ -0,0 +1,70 @@
+/* Written 2000 by Andi Kleen */
+/* This program is never executed, just its assembly is examined for offsets 
+   (this trick is needed to get cross compiling right) */  
+/* $Id: offset.c,v 1.13 2002/01/08 15:19:57 ak Exp $ */
+#define ASM_OFFSET_H 1
+#ifndef __KERNEL__
+#define __KERNEL__ 
+#endif
+#include <linux/sched.h> 
+#include <linux/stddef.h>
+#include <linux/errno.h> 
+#include <asm/pda.h>
+#include <asm/hardirq.h>
+#include <asm/processor.h>
+#include <asm/segment.h>
+#include <asm/thread_info.h>
+
+#define output(x) asm volatile ("--- " x)
+#define outconst(x,y) asm volatile ("--- " x : : "i" (y)) 
+
+int main(void) 
+{ 
+       output("/* Auto generated by arch/../tools/offset.c at " __DATE__ ". Do not edit. */\n"); 
+       output("#ifndef ASM_OFFSET_H\n");
+       output("#define ASM_OFFSET_H 1\n"); 
+
+       // task struct entries needed by entry.S
+#define ENTRY(entry) outconst("#define tsk_" #entry " %0", offsetof(struct task_struct, entry))
+       ENTRY(state);
+       ENTRY(flags); 
+       ENTRY(thread); 
+#undef ENTRY
+#define ENTRY(entry) outconst("#define threadinfo_" #entry " %0", offsetof(struct thread_info, entry))
+       ENTRY(flags);
+       ENTRY(addr_limit);
+       ENTRY(preempt_count);
+#undef ENTRY
+#define ENTRY(entry) outconst("#define pda_" #entry " %0", offsetof(struct x8664_pda, entry))
+       ENTRY(kernelstack); 
+       ENTRY(oldrsp); 
+       ENTRY(pcurrent); 
+       ENTRY(irqrsp);
+       ENTRY(irqcount);
+       ENTRY(irqstack); 
+       ENTRY(pgd_quick);
+       ENTRY(pmd_quick);
+       ENTRY(pte_quick);
+       ENTRY(pgtable_cache_sz);
+       ENTRY(cpunumber);
+       ENTRY(irqstackptr);
+       ENTRY(me);
+       ENTRY(__softirq_pending); 
+       ENTRY(__local_irq_count);
+       ENTRY(__local_bh_count);
+       ENTRY(__ksoftirqd_task);
+#undef ENTRY
+       output("#ifdef __ASSEMBLY__"); 
+#define CONST(t) outconst("#define " #t " %0", t)
+       CONST(TASK_SIZE);
+       CONST(SIGCHLD); 
+       CONST(CLONE_VFORK); 
+       CONST(CLONE_VM); 
+#undef CONST
+       output("#endif"); 
+
+       output("#endif\n"); 
+
+       return(0); 
+} 
+
diff --git a/arch/x86_64/tools/offset.sed b/arch/x86_64/tools/offset.sed
new file mode 100644 (file)
index 0000000..0bf9f83
--- /dev/null
@@ -0,0 +1,7 @@
+/---/ {
+       s/---//
+       s/\$//
+       s/^     //
+       s/^ //
+       p
+}
diff --git a/arch/x86_64/vmlinux.lds b/arch/x86_64/vmlinux.lds
new file mode 100644 (file)
index 0000000..f0a39f4
--- /dev/null
@@ -0,0 +1,112 @@
+/* ld script to make x86-64 Linux kernel
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
+ */
+OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+SECTIONS
+{
+  . = 0xffffffff80100000;
+  _text = .;                   /* Text and read-only data */
+  .text : {
+       *(.text)
+       *(.fixup)
+       *(.gnu.warning)
+       } = 0x9090
+  .text.lock : { *(.text.lock) }       /* out-of-line lock text */
+
+  _etext = .;                  /* End of text section */
+
+  .rodata : { *(.rodata) *(.rodata.*) }
+  .kstrtab : { *(.kstrtab) }
+
+  . = ALIGN(16);               /* Exception table */
+  __start___ex_table = .;
+  __ex_table : { *(__ex_table) }
+  __stop___ex_table = .;
+
+  __start___ksymtab = .;       /* Kernel symbol table */
+  __ksymtab : { *(__ksymtab) }
+  __stop___ksymtab = .;
+
+  .data : {                    /* Data */
+       *(.data)
+       CONSTRUCTORS
+       }
+
+  _edata = .;                  /* End of data section */
+
+  __bss_start = .;             /* BSS */
+  .bss : {
+       *(.bss)
+       }
+  __bss_end = .;
+
+  . = ALIGN(64);
+  .data.cacheline_aligned : { *(.data.cacheline_aligned) }
+
+  .vsyscall_0 -10*1024*1024: AT ((LOADADDR(.data.cacheline_aligned) + SIZEOF(.data.cacheline_aligned) + 4095) & ~(4095)) { *(.vsyscall_0) }
+  __vsyscall_0 = LOADADDR(.vsyscall_0);
+  . = ALIGN(64);
+  .vxtime_sequence : AT ((LOADADDR(.vsyscall_0) + SIZEOF(.vsyscall_0) + 63) & ~(63)) { *(.vxtime_sequence) }
+  vxtime_sequence = LOADADDR(.vxtime_sequence);
+  .last_tsc_low : AT (LOADADDR(.vxtime_sequence) + SIZEOF(.vxtime_sequence)) { *(.last_tsc_low) }
+  last_tsc_low = LOADADDR(.last_tsc_low);
+  .delay_at_last_interrupt : AT (LOADADDR(.last_tsc_low) + SIZEOF(.last_tsc_low)) { *(.delay_at_last_interrupt) }
+  delay_at_last_interrupt = LOADADDR(.delay_at_last_interrupt);
+  .fast_gettimeoffset_quotient : AT (LOADADDR(.delay_at_last_interrupt) + SIZEOF(.delay_at_last_interrupt)) { *(.fast_gettimeoffset_quotient) }
+  fast_gettimeoffset_quotient = LOADADDR(.fast_gettimeoffset_quotient);
+  .wall_jiffies : AT (LOADADDR(.fast_gettimeoffset_quotient) + SIZEOF(.fast_gettimeoffset_quotient)) { *(.wall_jiffies) }
+  wall_jiffies = LOADADDR(.wall_jiffies);
+  .sys_tz : AT (LOADADDR(.wall_jiffies) + SIZEOF(.wall_jiffies)) { *(.sys_tz) }
+  sys_tz = LOADADDR(.sys_tz);
+  .jiffies : AT (LOADADDR(.sys_tz) + SIZEOF(.sys_tz)) { *(.jiffies) }
+  jiffies = LOADADDR(.jiffies);
+  . = ALIGN(16);
+  .xtime : AT ((LOADADDR(.jiffies) + SIZEOF(.jiffies) + 15) & ~(15)) { *(.xtime) }
+  xtime = LOADADDR(.xtime);
+  .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT (LOADADDR(.vsyscall_0) + 1024) { *(.vsyscall_1) }
+  . = LOADADDR(.vsyscall_0) + 4096;
+
+  . = ALIGN(8192);             /* init_task */
+  .data.init_task : { *(.data.init_task) }
+
+  . = ALIGN(4096);             /* Init code and data */
+  __init_begin = .;
+  .text.init : { *(.text.init) }
+  .data.init : { *(.data.init) }
+  . = ALIGN(16);
+  __setup_start = .;
+  .setup.init : { *(.setup.init) }
+  __setup_end = .;
+  __initcall_start = .;
+  .initcall.init : {
+       *(.initcall1.init) 
+       *(.initcall2.init) 
+       *(.initcall3.init) 
+       *(.initcall4.init) 
+       *(.initcall5.init) 
+       *(.initcall6.init) 
+       *(.initcall7.init)
+  }
+  __initcall_end = .;
+  . = ALIGN(4096);
+  __init_end = .;
+
+  _end = . ;
+
+  /* Sections to be discarded */
+  /DISCARD/ : {
+       *(.data.exit)
+       *(.exitcall.exit)
+       }
+
+  /* Stabs debugging sections.  */
+  .stab 0 : { *(.stab) }
+  .stabstr 0 : { *(.stabstr) }
+  .stab.excl 0 : { *(.stab.excl) }
+  .stab.exclstr 0 : { *(.stab.exclstr) }
+  .stab.index 0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment 0 : { *(.comment) }
+}
diff --git a/include/asm-x86_64/a.out.h b/include/asm-x86_64/a.out.h
new file mode 100644 (file)
index 0000000..83c98f8
--- /dev/null
@@ -0,0 +1,30 @@
+#ifndef __X8664_A_OUT_H__
+#define __X8664_A_OUT_H__
+
+
+/* Note: a.out is not supported in 64bit mode. This is just here to 
+   still let some old things compile. */ 
+
+struct exec
+{
+  unsigned long a_info;                /* Use macros N_MAGIC, etc for access */
+  unsigned a_text;             /* length of text, in bytes */
+  unsigned a_data;             /* length of data, in bytes */
+  unsigned a_bss;              /* length of uninitialized data area for file, in bytes */
+  unsigned a_syms;             /* length of symbol table data in file, in bytes */
+  unsigned a_entry;            /* start address */
+  unsigned a_trsize;           /* length of relocation info for text, in bytes */
+  unsigned a_drsize;           /* length of relocation info for data, in bytes */
+};
+
+#define N_TRSIZE(a)    ((a).a_trsize)
+#define N_DRSIZE(a)    ((a).a_drsize)
+#define N_SYMSIZE(a)   ((a).a_syms)
+
+#ifdef __KERNEL__
+
+#define STACK_TOP      TASK_SIZE
+
+#endif
+
+#endif /* __A_OUT_GNU_H__ */
diff --git a/include/asm-x86_64/apic.h b/include/asm-x86_64/apic.h
new file mode 100644 (file)
index 0000000..f21e721
--- /dev/null
@@ -0,0 +1,103 @@
+#ifndef __ASM_APIC_H
+#define __ASM_APIC_H
+
+#include <linux/config.h>
+#include <linux/pm.h>
+#include <asm/apicdef.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+
+#define APIC_DEBUG 0
+
+#if APIC_DEBUG
+#define Dprintk(x...) printk(x)
+#else
+#define Dprintk(x...)
+#endif
+
+/*
+ * Basic functions accessing APICs.
+ */
+
+static __inline void apic_write(unsigned long reg, unsigned long v)
+{
+       *((volatile unsigned long *)(APIC_BASE+reg)) = v;
+}
+
+static __inline void apic_write_atomic(unsigned long reg, unsigned long v)
+{
+       xchg((volatile unsigned long *)(APIC_BASE+reg), v);
+}
+
+static __inline unsigned long apic_read(unsigned long reg)
+{
+       return *((volatile unsigned long *)(APIC_BASE+reg));
+}
+
+static __inline__ void apic_wait_icr_idle(void)
+{
+       do { } while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY );
+}
+
+#ifdef CONFIG_X86_GOOD_APIC
+# define FORCE_READ_AROUND_WRITE 0
+# define apic_read_around(x)
+# define apic_write_around(x,y) apic_write((x),(y))
+#else
+# define FORCE_READ_AROUND_WRITE 1
+# define apic_read_around(x) apic_read(x)
+# define apic_write_around(x,y) apic_write_atomic((x),(y))
+#endif
+
+static inline void ack_APIC_irq(void)
+{
+       /*
+        * ack_APIC_irq() actually gets compiled as a single instruction:
+        * - a single rmw on Pentium/82489DX
+        * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC)
+        * ... yummie.
+        */
+
+       /* Docs say use 0 for future compatibility */
+       apic_write_around(APIC_EOI, 0);
+}
+
+extern int get_maxlvt(void);
+extern void clear_local_APIC(void);
+extern void connect_bsp_APIC (void);
+extern void disconnect_bsp_APIC (void);
+extern void disable_local_APIC (void);
+extern int verify_local_APIC (void);
+extern void cache_APIC_registers (void);
+extern void sync_Arb_IDs (void);
+extern void init_bsp_APIC (void);
+extern void setup_local_APIC (void);
+extern void init_apic_mappings (void);
+extern void smp_local_timer_interrupt (struct pt_regs * regs);
+extern void setup_APIC_clocks (void);
+extern void setup_apic_nmi_watchdog (void);
+extern inline void nmi_watchdog_tick (struct pt_regs * regs);
+extern int APIC_init_uniprocessor (void);
+extern void disable_APIC_timer(void);
+extern void enable_APIC_timer(void);
+
+extern struct pm_dev *apic_pm_register(pm_dev_t, unsigned long, pm_callback);
+extern void apic_pm_unregister(struct pm_dev*);
+
+extern unsigned int apic_timer_irqs [NR_CPUS];
+extern int check_nmi_watchdog (void);
+
+extern unsigned int nmi_watchdog;
+#define NMI_NONE       0
+#define NMI_IO_APIC    1
+#define NMI_LOCAL_APIC 2
+#define NMI_INVALID    3
+
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#define clustered_apic_mode 0
+#define esr_disable 0
+extern unsigned boot_cpu_id;
+
+#endif /* __ASM_APIC_H */
diff --git a/include/asm-x86_64/apicdef.h b/include/asm-x86_64/apicdef.h
new file mode 100644 (file)
index 0000000..f855a7d
--- /dev/null
@@ -0,0 +1,363 @@
+#ifndef __ASM_APICDEF_H
+#define __ASM_APICDEF_H
+
+/*
+ * Constants for various Intel APICs. (local APIC, IOAPIC, etc.)
+ *
+ * Alan Cox <Alan.Cox@linux.org>, 1995.
+ * Ingo Molnar <mingo@redhat.com>, 1999, 2000
+ */
+
+#define                APIC_DEFAULT_PHYS_BASE  0xfee00000
+#define                APIC_ID         0x20
+#define                        APIC_ID_MASK            (0x0F<<24)
+#define                        GET_APIC_ID(x)          (((x)>>24)&0x0F)
+#define                APIC_LVR        0x30
+#define                        APIC_LVR_MASK           0xFF00FF
+#define                        GET_APIC_VERSION(x)     ((x)&0xFF)
+#define                        GET_APIC_MAXLVT(x)      (((x)>>16)&0xFF)
+#define                        APIC_INTEGRATED(x)      ((x)&0xF0)
+#define                APIC_TASKPRI    0x80
+#define                        APIC_TPRI_MASK          0xFF
+#define                APIC_ARBPRI     0x90
+#define                        APIC_ARBPRI_MASK        0xFF
+#define                APIC_PROCPRI    0xA0
+#define                APIC_EOI        0xB0
+#define                        APIC_EIO_ACK            0x0             /* Write this to the EOI register */
+#define                APIC_RRR        0xC0
+#define                APIC_LDR        0xD0
+#define                        APIC_LDR_MASK           (0xFF<<24)
+#define                        GET_APIC_LOGICAL_ID(x)  (((x)>>24)&0xFF)
+#define                        SET_APIC_LOGICAL_ID(x)  (((x)<<24))
+#define                        APIC_ALL_CPUS           0xFF
+#define                APIC_DFR        0xE0
+#define                APIC_SPIV       0xF0
+#define                        APIC_SPIV_FOCUS_DISABLED        (1<<9)
+#define                        APIC_SPIV_APIC_ENABLED          (1<<8)
+#define                APIC_ISR        0x100
+#define                APIC_TMR        0x180
+#define        APIC_IRR        0x200
+#define        APIC_ESR        0x280
+#define                        APIC_ESR_SEND_CS        0x00001
+#define                        APIC_ESR_RECV_CS        0x00002
+#define                        APIC_ESR_SEND_ACC       0x00004
+#define                        APIC_ESR_RECV_ACC       0x00008
+#define                        APIC_ESR_SENDILL        0x00020
+#define                        APIC_ESR_RECVILL        0x00040
+#define                        APIC_ESR_ILLREGA        0x00080
+#define                APIC_ICR        0x300
+#define                        APIC_DEST_SELF          0x40000
+#define                        APIC_DEST_ALLINC        0x80000
+#define                        APIC_DEST_ALLBUT        0xC0000
+#define                        APIC_ICR_RR_MASK        0x30000
+#define                        APIC_ICR_RR_INVALID     0x00000
+#define                        APIC_ICR_RR_INPROG      0x10000
+#define                        APIC_ICR_RR_VALID       0x20000
+#define                        APIC_INT_LEVELTRIG      0x08000
+#define                        APIC_INT_ASSERT         0x04000
+#define                        APIC_ICR_BUSY           0x01000
+#define                        APIC_DEST_LOGICAL       0x00800
+#define                        APIC_DM_FIXED           0x00000
+#define                        APIC_DM_LOWEST          0x00100
+#define                        APIC_DM_SMI             0x00200
+#define                        APIC_DM_REMRD           0x00300
+#define                        APIC_DM_NMI             0x00400
+#define                        APIC_DM_INIT            0x00500
+#define                        APIC_DM_STARTUP         0x00600
+#define                        APIC_DM_EXTINT          0x00700
+#define                        APIC_VECTOR_MASK        0x000FF
+#define                APIC_ICR2       0x310
+#define                        GET_APIC_DEST_FIELD(x)  (((x)>>24)&0xFF)
+#define                        SET_APIC_DEST_FIELD(x)  ((x)<<24)
+#define                APIC_LVTT       0x320
+#define                APIC_LVTPC      0x340
+#define                APIC_LVT0       0x350
+#define                        APIC_LVT_TIMER_BASE_MASK        (0x3<<18)
+#define                        GET_APIC_TIMER_BASE(x)          (((x)>>18)&0x3)
+#define                        SET_APIC_TIMER_BASE(x)          (((x)<<18))
+#define                        APIC_TIMER_BASE_CLKIN           0x0
+#define                        APIC_TIMER_BASE_TMBASE          0x1
+#define                        APIC_TIMER_BASE_DIV             0x2
+#define                        APIC_LVT_TIMER_PERIODIC         (1<<17)
+#define                        APIC_LVT_MASKED                 (1<<16)
+#define                        APIC_LVT_LEVEL_TRIGGER          (1<<15)
+#define                        APIC_LVT_REMOTE_IRR             (1<<14)
+#define                        APIC_INPUT_POLARITY             (1<<13)
+#define                        APIC_SEND_PENDING               (1<<12)
+#define                        GET_APIC_DELIVERY_MODE(x)       (((x)>>8)&0x7)
+#define                        SET_APIC_DELIVERY_MODE(x,y)     (((x)&~0x700)|((y)<<8))
+#define                                APIC_MODE_FIXED         0x0
+#define                                APIC_MODE_NMI           0x4
+#define                                APIC_MODE_EXINT         0x7
+#define        APIC_LVT1       0x360
+#define                APIC_LVTERR     0x370
+#define                APIC_TMICT      0x380
+#define                APIC_TMCCT      0x390
+#define                APIC_TDCR       0x3E0
+#define                        APIC_TDR_DIV_TMBASE     (1<<2)
+#define                        APIC_TDR_DIV_1          0xB
+#define                        APIC_TDR_DIV_2          0x0
+#define                        APIC_TDR_DIV_4          0x1
+#define                        APIC_TDR_DIV_8          0x2
+#define                        APIC_TDR_DIV_16         0x3
+#define                        APIC_TDR_DIV_32         0x8
+#define                        APIC_TDR_DIV_64         0x9
+#define                        APIC_TDR_DIV_128        0xA
+
+#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
+
+#define MAX_IO_APICS 8
+
+/*
+ * the local APIC register structure, memory mapped. Not terribly well
+ * tested, but we might eventually use this one in the future - the
+ * problem why we cannot use it right now is the P5 APIC, it has an
+ * errata which cannot take 8-bit reads and writes, only 32-bit ones ...
+ */
+#define u32 unsigned int
+
+#define lapic ((volatile struct local_apic *)APIC_BASE)
+
+struct local_apic {
+
+/*000*/        struct { u32 __reserved[4]; } __reserved_01;
+
+/*010*/        struct { u32 __reserved[4]; } __reserved_02;
+
+/*020*/        struct { /* APIC ID Register */
+               u32   __reserved_1      : 24,
+                       phys_apic_id    :  4,
+                       __reserved_2    :  4;
+               u32 __reserved[3];
+       } id;
+
+/*030*/        const
+       struct { /* APIC Version Register */
+               u32   version           :  8,
+                       __reserved_1    :  8,
+                       max_lvt         :  8,
+                       __reserved_2    :  8;
+               u32 __reserved[3];
+       } version;
+
+/*040*/        struct { u32 __reserved[4]; } __reserved_03;
+
+/*050*/        struct { u32 __reserved[4]; } __reserved_04;
+
+/*060*/        struct { u32 __reserved[4]; } __reserved_05;
+
+/*070*/        struct { u32 __reserved[4]; } __reserved_06;
+
+/*080*/        struct { /* Task Priority Register */
+               u32   priority  :  8,
+                       __reserved_1    : 24;
+               u32 __reserved_2[3];
+       } tpr;
+
+/*090*/        const
+       struct { /* Arbitration Priority Register */
+               u32   priority  :  8,
+                       __reserved_1    : 24;
+               u32 __reserved_2[3];
+       } apr;
+
+/*0A0*/        const
+       struct { /* Processor Priority Register */
+               u32   priority  :  8,
+                       __reserved_1    : 24;
+               u32 __reserved_2[3];
+       } ppr;
+
+/*0B0*/        struct { /* End Of Interrupt Register */
+               u32   eoi;
+               u32 __reserved[3];
+       } eoi;
+
+/*0C0*/        struct { u32 __reserved[4]; } __reserved_07;
+
+/*0D0*/        struct { /* Logical Destination Register */
+               u32   __reserved_1      : 24,
+                       logical_dest    :  8;
+               u32 __reserved_2[3];
+       } ldr;
+
+/*0E0*/        struct { /* Destination Format Register */
+               u32   __reserved_1      : 28,
+                       model           :  4;
+               u32 __reserved_2[3];
+       } dfr;
+
+/*0F0*/        struct { /* Spurious Interrupt Vector Register */
+               u32     spurious_vector :  8,
+                       apic_enabled    :  1,
+                       focus_cpu       :  1,
+                       __reserved_2    : 22;
+               u32 __reserved_3[3];
+       } svr;
+
+/*100*/        struct { /* In Service Register */
+/*170*/                u32 bitfield;
+               u32 __reserved[3];
+       } isr [8];
+
+/*180*/        struct { /* Trigger Mode Register */
+/*1F0*/                u32 bitfield;
+               u32 __reserved[3];
+       } tmr [8];
+
+/*200*/        struct { /* Interrupt Request Register */
+/*270*/                u32 bitfield;
+               u32 __reserved[3];
+       } irr [8];
+
+/*280*/        union { /* Error Status Register */
+               struct {
+                       u32   send_cs_error                     :  1,
+                               receive_cs_error                :  1,
+                               send_accept_error               :  1,
+                               receive_accept_error            :  1,
+                               __reserved_1                    :  1,
+                               send_illegal_vector             :  1,
+                               receive_illegal_vector          :  1,
+                               illegal_register_address        :  1,
+                               __reserved_2                    : 24;
+                       u32 __reserved_3[3];
+               } error_bits;
+               struct {
+                       u32 errors;
+                       u32 __reserved_3[3];
+               } all_errors;
+       } esr;
+
+/*290*/        struct { u32 __reserved[4]; } __reserved_08;
+
+/*2A0*/        struct { u32 __reserved[4]; } __reserved_09;
+
+/*2B0*/        struct { u32 __reserved[4]; } __reserved_10;
+
+/*2C0*/        struct { u32 __reserved[4]; } __reserved_11;
+
+/*2D0*/        struct { u32 __reserved[4]; } __reserved_12;
+
+/*2E0*/        struct { u32 __reserved[4]; } __reserved_13;
+
+/*2F0*/        struct { u32 __reserved[4]; } __reserved_14;
+
+/*300*/        struct { /* Interrupt Command Register 1 */
+               u32   vector                    :  8,
+                       delivery_mode           :  3,
+                       destination_mode        :  1,
+                       delivery_status         :  1,
+                       __reserved_1            :  1,
+                       level                   :  1,
+                       trigger                 :  1,
+                       __reserved_2            :  2,
+                       shorthand               :  2,
+                       __reserved_3            :  12;
+               u32 __reserved_4[3];
+       } icr1;
+
+/*310*/        struct { /* Interrupt Command Register 2 */
+               union {
+                       u32   __reserved_1      : 24,
+                               phys_dest       :  4,
+                               __reserved_2    :  4;
+                       u32   __reserved_3      : 24,
+                               logical_dest    :  8;
+               } dest;
+               u32 __reserved_4[3];
+       } icr2;
+
+/*320*/        struct { /* LVT - Timer */
+               u32   vector            :  8,
+                       __reserved_1    :  4,
+                       delivery_status :  1,
+                       __reserved_2    :  3,
+                       mask            :  1,
+                       timer_mode      :  1,
+                       __reserved_3    : 14;
+               u32 __reserved_4[3];
+       } lvt_timer;
+
+/*330*/        struct { u32 __reserved[4]; } __reserved_15;
+
+/*340*/        struct { /* LVT - Performance Counter */
+               u32   vector            :  8,
+                       delivery_mode   :  3,
+                       __reserved_1    :  1,
+                       delivery_status :  1,
+                       __reserved_2    :  3,
+                       mask            :  1,
+                       __reserved_3    : 15;
+               u32 __reserved_4[3];
+       } lvt_pc;
+
+/*350*/        struct { /* LVT - LINT0 */
+               u32   vector            :  8,
+                       delivery_mode   :  3,
+                       __reserved_1    :  1,
+                       delivery_status :  1,
+                       polarity        :  1,
+                       remote_irr      :  1,
+                       trigger         :  1,
+                       mask            :  1,
+                       __reserved_2    : 15;
+               u32 __reserved_3[3];
+       } lvt_lint0;
+
+/*360*/        struct { /* LVT - LINT1 */
+               u32   vector            :  8,
+                       delivery_mode   :  3,
+                       __reserved_1    :  1,
+                       delivery_status :  1,
+                       polarity        :  1,
+                       remote_irr      :  1,
+                       trigger         :  1,
+                       mask            :  1,
+                       __reserved_2    : 15;
+               u32 __reserved_3[3];
+       } lvt_lint1;
+
+/*370*/        struct { /* LVT - Error */
+               u32   vector            :  8,
+                       __reserved_1    :  4,
+                       delivery_status :  1,
+                       __reserved_2    :  3,
+                       mask            :  1,
+                       __reserved_3    : 15;
+               u32 __reserved_4[3];
+       } lvt_error;
+
+/*380*/        struct { /* Timer Initial Count Register */
+               u32   initial_count;
+               u32 __reserved_2[3];
+       } timer_icr;
+
+/*390*/        const
+       struct { /* Timer Current Count Register */
+               u32   curr_count;
+               u32 __reserved_2[3];
+       } timer_ccr;
+
+/*3A0*/        struct { u32 __reserved[4]; } __reserved_16;
+
+/*3B0*/        struct { u32 __reserved[4]; } __reserved_17;
+
+/*3C0*/        struct { u32 __reserved[4]; } __reserved_18;
+
+/*3D0*/        struct { u32 __reserved[4]; } __reserved_19;
+
+/*3E0*/        struct { /* Timer Divide Configuration Register */
+               u32   divisor           :  4,
+                       __reserved_1    : 28;
+               u32 __reserved_2[3];
+       } timer_dcr;
+
+/*3F0*/        struct { u32 __reserved[4]; } __reserved_20;
+
+} __attribute__ ((packed));
+
+#undef u32
+
+#endif
diff --git a/include/asm-x86_64/atomic.h b/include/asm-x86_64/atomic.h
new file mode 100644 (file)
index 0000000..3dcac33
--- /dev/null
@@ -0,0 +1,206 @@
+#ifndef __ARCH_X86_64_ATOMIC__
+#define __ARCH_X86_64_ATOMIC__
+
+#include <linux/config.h>
+
+/* atomic_t should be 32 bit signed type */
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc..
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK "lock ; "
+#else
+#define LOCK ""
+#endif
+
+/*
+ * Make sure gcc doesn't try to be clever and move things around
+ * on us. We need to use _exactly_ the address the user gave us,
+ * not some alias that contains the same information.
+ */
+typedef struct { volatile int counter; } atomic_t;
+
+#define ATOMIC_INIT(i) { (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically reads the value of @v.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+#define atomic_read(v)         ((v)->counter)
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ * 
+ * Atomically sets the value of @v to @i.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+#define atomic_set(v,i)                (((v)->counter) = (i))
+
+/**
+ * atomic_add - add integer to atomic variable
+ * @i: integer value to add
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically adds @i to @v.  Note that the guaranteed useful range
+ * of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_add(int i, atomic_t *v)
+{
+       __asm__ __volatile__(
+               LOCK "addl %1,%0"
+               :"=m" (v->counter)
+               :"ir" (i), "m" (v->counter));
+}
+
+/**
+ * atomic_sub - subtract the atomic variable
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically subtracts @i from @v.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ void atomic_sub(int i, atomic_t *v)
+{
+       __asm__ __volatile__(
+               LOCK "subl %1,%0"
+               :"=m" (v->counter)
+               :"ir" (i), "m" (v->counter));
+}
+
+/**
+ * atomic_sub_and_test - subtract value from variable and test result
+ * @i: integer value to subtract
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically subtracts @i from @v and returns
+ * true if the result is zero, or false for all
+ * other cases.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */
+static __inline__ int atomic_sub_and_test(int i, atomic_t *v)
+{
+       unsigned char c;
+
+       __asm__ __volatile__(
+               LOCK "subl %2,%0; sete %1"
+               :"=m" (v->counter), "=qm" (c)
+               :"ir" (i), "m" (v->counter) : "memory");
+       return c;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ void atomic_inc(atomic_t *v)
+{
+       __asm__ __volatile__(
+               LOCK "incl %0"
+               :"=m" (v->counter)
+               :"m" (v->counter));
+}
+
+/**
+ * atomic_dec - decrement atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ void atomic_dec(atomic_t *v)
+{
+       __asm__ __volatile__(
+               LOCK "decl %0"
+               :"=m" (v->counter)
+               :"m" (v->counter));
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ int atomic_dec_and_test(atomic_t *v)
+{
+       unsigned char c;
+
+       __asm__ __volatile__(
+               LOCK "decl %0; sete %1"
+               :"=m" (v->counter), "=qm" (c)
+               :"m" (v->counter) : "memory");
+       return c != 0;
+}
+
+/**
+ * atomic_inc_and_test - increment and test 
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ int atomic_inc_and_test(atomic_t *v)
+{
+       unsigned char c;
+
+       __asm__ __volatile__(
+               LOCK "incl %0; sete %1"
+               :"=m" (v->counter), "=qm" (c)
+               :"m" (v->counter) : "memory");
+       return c != 0;
+}
+
+/**
+ * atomic_add_negative - add and test if negative
+ * @v: pointer of type atomic_t
+ * @i: integer value to add
+ * 
+ * Atomically adds @i to @v and returns true
+ * if the result is negative, or false when
+ * result is greater than or equal to zero.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ int atomic_add_negative(int i, atomic_t *v)
+{
+       unsigned char c;
+
+       __asm__ __volatile__(
+               LOCK "addl %2,%0; sets %1"
+               :"=m" (v->counter), "=qm" (c)
+               :"ir" (i), "m" (v->counter) : "memory");
+       return c;
+}
+
+/* These are x86-specific, used by some header files */
+#define atomic_clear_mask(mask, addr) \
+__asm__ __volatile__(LOCK "andl %0,%1" \
+: : "r" (~(mask)),"m" (*addr) : "memory")
+
+#define atomic_set_mask(mask, addr) \
+__asm__ __volatile__(LOCK "orl %0,%1" \
+: : "r" ((unsigned)mask),"m" (*addr) : "memory")
+
+/* Atomic operations are already serializing on x86 */
+#define smp_mb__before_atomic_dec()    barrier()
+#define smp_mb__after_atomic_dec()     barrier()
+#define smp_mb__before_atomic_inc()    barrier()
+#define smp_mb__after_atomic_inc()     barrier()
+
+#endif
diff --git a/include/asm-x86_64/bitops.h b/include/asm-x86_64/bitops.h
new file mode 100644 (file)
index 0000000..bf2ef10
--- /dev/null
@@ -0,0 +1,465 @@
+#ifndef _X86_64_BITOPS_H
+#define _X86_64_BITOPS_H
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ */
+
+#include <linux/config.h>
+
+/*
+ * These have to be done with inline assembly: that way the bit-setting
+ * is guaranteed to be atomic. All bit operations return 0 if the bit
+ * was cleared before the operation and != 0 if it was not.
+ *
+ * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
+ */
+
+#ifdef CONFIG_SMP
+#define LOCK_PREFIX "lock ; "
+#else
+#define LOCK_PREFIX ""
+#endif
+
+#define ADDR (*(volatile long *) addr)
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ */
+static __inline__ void set_bit(int nr, volatile void * addr)
+{
+       __asm__ __volatile__( LOCK_PREFIX
+               "btsl %1,%0"
+               :"=m" (ADDR)
+               :"dIr" (nr));
+}
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void __set_bit(int nr, volatile void * addr)
+{
+       __asm__(
+               "btsl %1,%0"
+               :"=m" (ADDR)
+               :"dIr" (nr));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static __inline__ void clear_bit(int nr, volatile void * addr)
+{
+       __asm__ __volatile__( LOCK_PREFIX
+               "btrl %1,%0"
+               :"=m" (ADDR)
+               :"dIr" (nr));
+}
+
+static __inline__ void __clear_bit(int nr, volatile void * addr)
+{
+       __asm__ __volatile__(
+               "btrl %1,%0"
+               :"=m" (ADDR)
+               :"Ir" (nr));
+}
+#define smp_mb__before_clear_bit()     barrier()
+#define smp_mb__after_clear_bit()      barrier()
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static __inline__ void __change_bit(int nr, volatile void * addr)
+{
+       __asm__ __volatile__(
+               "btcl %1,%0"
+               :"=m" (ADDR)
+               :"dIr" (nr));
+}
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void change_bit(int nr, volatile void * addr)
+{
+       __asm__ __volatile__( LOCK_PREFIX
+               "btcl %1,%0"
+               :"=m" (ADDR)
+               :"dIr" (nr));
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_set_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__( LOCK_PREFIX
+               "btsl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"dIr" (nr) : "memory");
+       return oldbit;
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.  
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static __inline__ int __test_and_set_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__(
+               "btsl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"dIr" (nr));
+       return oldbit;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_clear_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__( LOCK_PREFIX
+               "btrl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"dIr" (nr) : "memory");
+       return oldbit;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.  
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static __inline__ int __test_and_clear_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__(
+               "btrl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"dIr" (nr));
+       return oldbit;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static __inline__ int __test_and_change_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__(
+               "btcl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"dIr" (nr) : "memory");
+       return oldbit;
+}
+
+/**
+ * test_and_change_bit - Change a bit and return its new value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline__ int test_and_change_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__( LOCK_PREFIX
+               "btcl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit),"=m" (ADDR)
+               :"dIr" (nr) : "memory");
+       return oldbit;
+}
+
+#if 0 /* Fool kernel-doc since it doesn't do macros yet */
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static int test_bit(int nr, const volatile void * addr);
+#endif
+
+static __inline__ int constant_test_bit(int nr, const volatile void * addr)
+{
+       return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline__ int variable_test_bit(int nr, volatile void * addr)
+{
+       int oldbit;
+
+       __asm__ __volatile__(
+               "btl %2,%1\n\tsbbl %0,%0"
+               :"=r" (oldbit)
+               :"m" (ADDR),"dIr" (nr));
+       return oldbit;
+}
+
+#define test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ constant_test_bit((nr),(addr)) : \
+ variable_test_bit((nr),(addr)))
+
+/**
+ * find_first_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first zero bit, not the number of the byte
+ * containing a bit.
+ */
+static __inline__ int find_first_zero_bit(void * addr, unsigned size)
+{
+       int d0, d1, d2;
+       int res;
+
+       if (!size)
+               return 0;
+       __asm__ __volatile__(
+               "movl $-1,%%eax\n\t"
+               "xorq %%rdx,%%rdx\n\t"
+               "repe; scasl\n\t"
+               "je 1f\n\t"
+               "xorl -4(%%rdi),%%eax\n\t"
+               "subq $4,%%rdi\n\t"
+               "bsfl %%eax,%%edx\n"
+               "1:\tsubq %%rbx,%%rdi\n\t"
+               "shlq $3,%%rdi\n\t"
+               "addq %%rdi,%%rdx"
+               :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
+               :"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
+       return res;
+}
+
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first set bit, not the number of the byte
+ * containing a bit.
+ */
+static __inline__ int find_first_bit(void * addr, unsigned size)
+{
+       int d0, d1;
+       int res;
+
+       /* This looks at memory. Mark it volatile to tell gcc not to move it around */
+       /* Work in 32bit for now */ 
+       __asm__ __volatile__(
+               "xorl %%eax,%%eax\n\t"
+               "repe; scasl\n\t"
+               "jz 1f\n\t"
+               "leaq -4(%%rdi),%%rdi\n\t"
+               "bsfl (%%rdi),%%eax\n"
+               "1:\tsubq %%rbx,%%rdi\n\t"
+               "shlq $3,%%rdi\n\t"
+               "addq %%rdi,%%rax"
+               :"=a" (res), "=&c" (d0), "=&D" (d1)
+               :"1" ((size + 31) >> 5), "2" (addr), "b" (addr));
+       return res;
+}
+
+/**
+ * find_next_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
+{
+       unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
+       int set = 0, bit = offset & 31, res;
+       
+       if (bit) {
+               /*
+                * Look for zero in the first 32 bits.
+                */
+               __asm__("bsfl %1,%0\n\t"
+                       "jne 1f\n\t"
+                       "movl $32, %0\n"
+                       "1:"
+                       : "=r" (set)
+                       : "r" (~(*p >> bit)));
+               if (set < (32 - bit))
+                       return set + offset;
+               set = 32 - bit;
+               p++;
+       }
+       /*
+        * No zero yet, search remaining full bytes for a zero
+        */
+       res = find_first_zero_bit (p, size - 32 * (p - (unsigned int *) addr));
+       return (offset + set + res);
+}
+
+/**
+ * find_next_bit - find the first set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+static __inline__ int find_next_bit (void * addr, int size, int offset)
+{
+       unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
+       unsigned long set = 0, bit = offset & 63, res;
+
+       if (bit) {
+               /*
+                * Look for nonzero in the first 64 bits:
+                */
+               __asm__("bsfq %1,%0\n\t"
+                       "jne 1f\n\t"
+                       "movq $64, %0\n"
+                       "1:"
+                       : "=r" (set)
+                       : "r" (*p >> bit));
+               if (set < (64 - bit))
+                       return set + offset;
+               set = 64 - bit;
+               p++;
+       }
+       /*
+        * No set bit yet, search remaining full words for a bit
+        */
+       res = find_first_bit (p, size - 64 * (p - (unsigned long *) addr));
+       return (offset + set + res);
+}
+
+/**
+ * ffz - find first zero in word.
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+static __inline__ unsigned long ffz(unsigned long word)
+{
+       __asm__("bsfq %1,%0"
+               :"=r" (word)
+               :"r" (~word));
+       return word;
+}
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static __inline__ unsigned long __ffs(unsigned long word)
+{
+       __asm__("bsfq %1,%0"
+               :"=r" (word)
+               :"rm" (word));
+       return word;
+}
+
+#ifdef __KERNEL__
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static __inline__ int ffs(int x)
+{
+       int r;
+
+       __asm__("bsfl %1,%0\n\t"
+               "jnz 1f\n\t"
+               "movl $-1,%0\n"
+               "1:" : "=r" (r) : "g" (x));
+       return r+1;
+}
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+
+#define hweight32(x) generic_hweight32(x)
+#define hweight16(x) generic_hweight16(x)
+#define hweight8(x) generic_hweight8(x)
+
+#endif /* __KERNEL__ */
+
+#ifdef __KERNEL__
+
+#define ext2_set_bit                 __test_and_set_bit
+#define ext2_clear_bit               __test_and_clear_bit
+#define ext2_test_bit                test_bit
+#define ext2_find_first_zero_bit     find_first_zero_bit
+#define ext2_find_next_zero_bit      find_next_zero_bit
+
+/* Bitmap functions for the minix filesystem.  */
+#define minix_test_and_set_bit(nr,addr) __test_and_set_bit(nr,addr)
+#define minix_set_bit(nr,addr) __set_bit(nr,addr)
+#define minix_test_and_clear_bit(nr,addr) __test_and_clear_bit(nr,addr)
+#define minix_test_bit(nr,addr) test_bit(nr,addr)
+#define minix_find_first_zero_bit(addr,size) find_first_zero_bit(addr,size)
+
+#endif /* __KERNEL__ */
+
+#endif /* _X86_64_BITOPS_H */
diff --git a/include/asm-x86_64/boot.h b/include/asm-x86_64/boot.h
new file mode 100644 (file)
index 0000000..96b228e
--- /dev/null
@@ -0,0 +1,15 @@
+#ifndef _LINUX_BOOT_H
+#define _LINUX_BOOT_H
+
+/* Don't touch these, unless you really know what you're doing. */
+#define DEF_INITSEG    0x9000
+#define DEF_SYSSEG     0x1000
+#define DEF_SETUPSEG   0x9020
+#define DEF_SYSSIZE    0x7F00
+
+/* Internal svga startup constants */
+#define NORMAL_VGA     0xffff          /* 80x25 mode */
+#define EXTENDED_VGA   0xfffe          /* 80x50 mode */
+#define ASK_VGA                0xfffd          /* ask for it at bootup */
+
+#endif
diff --git a/include/asm-x86_64/bootsetup.h b/include/asm-x86_64/bootsetup.h
new file mode 100644 (file)
index 0000000..e1fac9b
--- /dev/null
@@ -0,0 +1,34 @@
+
+#ifndef _X86_64_BOOTSETUP_H
+#define _X86_64_BOOTSETUP_H 1
+
+extern char x86_boot_params[2048];
+
+/*
+ * This is set up by the setup-routine at boot-time
+ */
+#define PARAM  ((unsigned char *)x86_boot_params)
+#define SCREEN_INFO (*(struct screen_info *) (PARAM+0))
+#define EXT_MEM_K (*(unsigned short *) (PARAM+2))
+#define ALT_MEM_K (*(unsigned int *) (PARAM+0x1e0))
+#define E820_MAP_NR (*(char*) (PARAM+E820NR))
+#define E820_MAP    ((struct e820entry *) (PARAM+E820MAP))
+#define APM_BIOS_INFO (*(struct apm_bios_info *) (PARAM+0x40))
+#define DRIVE_INFO (*(struct drive_info_struct *) (PARAM+0x80))
+#define SYS_DESC_TABLE (*(struct sys_desc_table_struct*)(PARAM+0xa0))
+#define MOUNT_ROOT_RDONLY (*(unsigned short *) (PARAM+0x1F2))
+#define RAMDISK_FLAGS (*(unsigned short *) (PARAM+0x1F8))
+#define ORIG_ROOT_DEV (*(unsigned short *) (PARAM+0x1FC))
+#define AUX_DEVICE_INFO (*(unsigned char *) (PARAM+0x1FF))
+#define LOADER_TYPE (*(unsigned char *) (PARAM+0x210))
+#define KERNEL_START (*(unsigned int *) (PARAM+0x214))
+#define INITRD_START (*(unsigned int *) (PARAM+0x218))
+#define INITRD_SIZE (*(unsigned int *) (PARAM+0x21c))
+#define COMMAND_LINE saved_command_line
+#define COMMAND_LINE_SIZE 256
+
+#define RAMDISK_IMAGE_START_MASK       0x07FF
+#define RAMDISK_PROMPT_FLAG            0x8000
+#define RAMDISK_LOAD_FLAG              0x4000  
+
+#endif
diff --git a/include/asm-x86_64/bugs.h b/include/asm-x86_64/bugs.h
new file mode 100644 (file)
index 0000000..af6e9ee
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ *  include/asm-x86_64/bugs.h
+ *
+ *  Copyright (C) 1994  Linus Torvalds
+ *  Copyright (C) 2000  SuSE
+ *
+ * This is included by init/main.c to check for architecture-dependent bugs.
+ *
+ * Needs:
+ *     void check_bugs(void);
+ */
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/i387.h>
+
+static inline void check_fpu(void)
+{
+       extern void __bad_fxsave_alignment(void);
+       if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
+               __bad_fxsave_alignment();
+       printk(KERN_INFO "Enabling fast FPU save and restore... ");
+       set_in_cr4(X86_CR4_OSFXSR);
+       printk("done.\n");
+       printk(KERN_INFO "Enabling unmasked SIMD FPU exception support... ");
+       set_in_cr4(X86_CR4_OSXMMEXCPT);
+       printk("done.\n");
+}
+
+/*
+ * If we configured ourselves for FXSR, we'd better have it.
+ */
+
+static void __init check_bugs(void)
+{
+       identify_cpu(&boot_cpu_data);
+       check_fpu();
+#if !defined(CONFIG_SMP)
+       printk("CPU: ");
+       print_cpu_info(&boot_cpu_data);
+#endif
+}
diff --git a/include/asm-x86_64/byteorder.h b/include/asm-x86_64/byteorder.h
new file mode 100644 (file)
index 0000000..daef6af
--- /dev/null
@@ -0,0 +1,32 @@
+#ifndef _X86_64_BYTEORDER_H
+#define _X86_64_BYTEORDER_H
+
+#include <asm/types.h>
+
+#ifdef __GNUC__
+
+static __inline__ __const__ __u64 ___arch__swab64(__u64 x)
+{
+       __asm__("bswapq %0" : "=r" (x) : "0" (x));
+       return x;
+}
+
+static __inline__ __const__ __u32 ___arch__swab32(__u32 x)
+{
+       __asm__("bswapl %0" : "=r" (x) : "0" (x));
+       return x;
+}
+
+/* Do not define swab16.  Gcc is smart enought to recognize "C" version and
+   convert it into rotation or exhange.  */
+
+#define __arch__swab32(x) ___arch__swab32(x)
+#define __arch__swab64(x) ___arch__swab64(x)
+
+#endif /* __GNUC__ */
+
+#define __BYTEORDER_HAS_U64__
+
+#include <linux/byteorder/little_endian.h>
+
+#endif /* _X86_64_BYTEORDER_H */
diff --git a/include/asm-x86_64/cache.h b/include/asm-x86_64/cache.h
new file mode 100644 (file)
index 0000000..b4a2401
--- /dev/null
@@ -0,0 +1,13 @@
+/*
+ * include/asm-x8664/cache.h
+ */
+#ifndef __ARCH_X8664_CACHE_H
+#define __ARCH_X8664_CACHE_H
+
+#include <linux/config.h>
+
+/* L1 cache line size */
+#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
+#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+
+#endif
diff --git a/include/asm-x86_64/calling.h b/include/asm-x86_64/calling.h
new file mode 100644 (file)
index 0000000..443ad51
--- /dev/null
@@ -0,0 +1,100 @@
+/* Some macros to handle stack frames */ 
+
+       .macro SAVE_ARGS        
+       pushq %rdi
+       pushq %rsi
+       pushq %rdx
+       pushq %rcx
+       pushq %rax
+       pushq %r8
+       pushq %r9
+       pushq %r10
+       pushq %r11
+       .endm
+
+       .macro RESTORE_ARGS
+       popq %r11
+       popq %r10
+       popq %r9
+       popq %r8
+       popq %rax
+       popq %rcx       
+       popq %rdx       
+       popq %rsi       
+       popq %rdi       
+       .endm   
+
+       .macro LOAD_ARGS offset
+       movq \offset(%rsp),%r11
+       movq \offset+8(%rsp),%r10
+       movq \offset+16(%rsp),%r9
+       movq \offset+24(%rsp),%r8
+       movq \offset+40(%rsp),%rcx
+       movq \offset+48(%rsp),%rdx
+       movq \offset+56(%rsp),%rsi
+       movq \offset+64(%rsp),%rdi
+       movq \offset+72(%rsp),%rax
+       .endm
+                       
+       .macro SAVE_REST
+       pushq %rbx
+       pushq %rbp
+       pushq %r12
+       pushq %r13
+       pushq %r14
+       pushq %r15
+       .endm           
+
+       .macro RESTORE_REST
+       popq %r15
+       popq %r14
+       popq %r13
+       popq %r12
+       popq %rbp
+       popq %rbx
+       .endm
+               
+       .macro SAVE_ALL
+       SAVE_ARGS
+       SAVE_REST
+       .endm
+               
+       .macro RESTORE_ALL
+       RESTORE_REST
+       RESTORE_ARGS
+       .endm
+
+
+R15 = 0
+R14 = 8
+R13 = 16
+R12 = 24
+RBP = 36
+RBX = 40
+/* arguments: interrupts/non tracing syscalls only save upto here*/
+R11 = 48
+R10 = 56       
+R9 = 64
+R8 = 72
+RAX = 80
+RCX = 88
+RDX = 96
+RSI = 104
+RDI = 112
+ORIG_RAX = 120       /* = ERROR */ 
+/* end of arguments */         
+/* cpu exception frame or undefined in case of fast syscall. */
+RIP = 128
+CS = 136
+EFLAGS = 144
+RSP = 152
+SS = 160
+ARGOFFSET = R11
+
+       .macro SYSRET32
+       .byte 0x0f,0x07
+       .endm
+
+       .macro SYSRET64
+       .byte 0x48,0x0f,0x07
+       .endm
diff --git a/include/asm-x86_64/checksum.h b/include/asm-x86_64/checksum.h
new file mode 100644 (file)
index 0000000..ba7cad8
--- /dev/null
@@ -0,0 +1,158 @@
+#ifndef _X86_64_CHECKSUM_H
+#define _X86_64_CHECKSUM_H
+
+
+/*
+ *     This is a version of ip_compute_csum() optimized for IP headers,
+ *     which always checksum on 4 octet boundaries.
+ *
+ *     By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
+ *     Arnt Gulbrandsen.
+ */
+static inline unsigned short ip_fast_csum(unsigned char * iph,
+                                         unsigned int ihl) {
+       unsigned int sum;
+
+       __asm__ __volatile__(
+"\n        movl (%1), %0"
+"\n        subl $4, %2"
+"\n        jbe 2f"
+"\n        addl 4(%1), %0"
+"\n        adcl 8(%1), %0"
+"\n        adcl 12(%1), %0"
+"\n1:      adcl 16(%1), %0"
+"\n        lea 4(%1), %1"
+"\n        decl %2"
+"\n        jne 1b"
+"\n        adcl $0, %0"
+"\n        movl %0, %2"
+"\n        shrl $16, %0"
+"\n        addw %w2, %w0"
+"\n        adcl $0, %0"
+"\n        notl %0"
+"\n2:"
+       /* Since the input registers which are loaded with iph and ipl
+          are modified, we must also specify them as outputs, or gcc
+          will assume they contain their original values. */
+       : "=r" (sum), "=r" (iph), "=r" (ihl)
+       : "1" (iph), "2" (ihl));
+       return(sum);
+}
+
+
+
+/*
+ *     Fold a partial checksum. Note this works on a 32bit unfolded checksum. Make sure
+ *     to not mix with 64bit checksums!
+ */
+
+static inline unsigned int csum_fold(unsigned int sum)
+{
+       __asm__(
+"\n            addl %1,%0"
+"\n            adcl $0xffff,%0"
+               : "=r" (sum)
+               : "r" (sum << 16), "0" (sum & 0xffff0000)
+       );
+       return (~sum) >> 16;
+}
+
+
+
+
+static inline unsigned long csum_tcpudp_nofold(unsigned saddr,
+                                                  unsigned daddr,
+                                                  unsigned short len,
+                                                  unsigned short proto,
+                                                  unsigned int sum) 
+{
+    __asm__(
+"\n    addl %1, %0"
+"\n    adcl %2, %0"
+"\n    adcl %3, %0"
+"\n    adcl $0, %0"
+       : "=r" (sum)
+       : "g" (daddr), "g"(saddr), "g"((ntohs(len)<<16)+proto*256), "0"(sum));
+    return sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline unsigned short int csum_tcpudp_magic(unsigned long saddr,
+                                                  unsigned long daddr,
+                                                  unsigned short len,
+                                                  unsigned short proto,
+                                                  unsigned int sum) 
+{
+       return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+extern unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum);
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+unsigned int csum_partial_copy(const char *src, char *dst, int len, unsigned int sum);
+
+/*
+ * this is a new version of the above that records errors it finds in *errp,
+ * but continues and zeros the rest of the buffer.
+ */
+unsigned int csum_partial_copy_from_user(const char *src, char *dst, int len, unsigned int sum, int *errp);
+
+unsigned int csum_partial_copy_nocheck(const char *src, char *dst, int len, unsigned int sum);
+
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+
+extern unsigned short ip_compute_csum(unsigned char * buff, int len);
+
+#define _HAVE_ARCH_IPV6_CSUM
+static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
+                                                    struct in6_addr *daddr,
+                                                    __u32 len,
+                                                    unsigned short proto,
+                                                    unsigned int sum) 
+{
+       __asm__(
+"\n            addl 0(%1), %0"
+"\n            adcl 4(%1), %0"
+"\n            adcl 8(%1), %0"
+"\n            adcl 12(%1), %0"
+"\n            adcl 0(%2), %0"
+"\n            adcl 4(%2), %0"
+"\n            adcl 8(%2), %0"
+"\n            adcl 12(%2), %0"
+"\n            adcl %3, %0"
+"\n            adcl %4, %0"
+"\n            adcl $0, %0"
+               : "=&r" (sum)
+               : "r" (saddr), "r" (daddr), 
+                 "r"(htonl(len)), "r"(htonl(proto)), "0"(sum));
+
+       return csum_fold(sum);
+}
+
+#endif
diff --git a/include/asm-x86_64/cpufeature.h b/include/asm-x86_64/cpufeature.h
new file mode 100644 (file)
index 0000000..7d9f90e
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * cpufeature.h
+ *
+ * Defines x86 CPU feature bits
+ */
+
+#ifndef __ASM_X8664_CPUFEATURE_H
+#define __ASM_X8664_CPUFEATURE_H
+
+/* Sample usage: CPU_FEATURE_P(cpu.x86_capability, FPU) */
+#define CPU_FEATURE_P(CAP, FEATURE) test_bit(CAP, X86_FEATURE_##FEATURE ##_BIT)
+
+#define NCAPINTS       4       /* Currently we have 4 32-bit words worth of info */
+
+/* Intel-defined CPU features, CPUID level 0x00000001, word 0 */
+#define X86_FEATURE_FPU                (0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME                (0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE         (0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE        (0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC                (0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR                (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */
+#define X86_FEATURE_PAE                (0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE                (0*32+ 7) /* Machine Check Architecture */
+#define X86_FEATURE_CX8                (0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC       (0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP                (0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR       (0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE                (0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA                (0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV       (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */
+#define X86_FEATURE_PAT                (0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36      (0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN         (0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLSH     (0*32+19) /* Supports the CLFLUSH instruction */
+#define X86_FEATURE_DTES       (0*32+21) /* Debug Trace Store */
+#define X86_FEATURE_ACPI       (0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX                (0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR       (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */
+                                         /* of FPU context), and CR4.OSFXSR available */
+#define X86_FEATURE_XMM                (0*32+25) /* Streaming SIMD Extensions */
+#define X86_FEATURE_XMM2       (0*32+26) /* Streaming SIMD Extensions-2 */
+#define X86_FEATURE_SELFSNOOP  (0*32+27) /* CPU self snoop */
+#define X86_FEATURE_ACC                (0*32+29) /* Automatic clock control */
+#define X86_FEATURE_IA64       (0*32+30) /* IA-64 processor */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL    (1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MMXEXT     (1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_LM         (1*32+29) /* Long Mode (x86-64) */
+#define X86_FEATURE_3DNOWEXT   (1*32+30) /* AMD 3DNow! extensions */
+#define X86_FEATURE_3DNOW      (1*32+31) /* 3DNow! */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY   (2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN    (2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI       (2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX      (3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR    (3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR  (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR        (3*32+ 3) /* Centaur MCRs (= MTRRs) */
+
+#endif /* __ASM_X8664_CPUFEATURE_H */
+
+/* 
+ * Local Variables:
+ * mode:c
+ * comment-column:42
+ * End:
+ */
diff --git a/include/asm-x86_64/current.h b/include/asm-x86_64/current.h
new file mode 100644 (file)
index 0000000..267e84f
--- /dev/null
@@ -0,0 +1,29 @@
+#ifndef _X86_64_CURRENT_H
+#define _X86_64_CURRENT_H
+
+#if !defined(__ASSEMBLY__) 
+struct task_struct;
+
+#include <asm/pda.h>
+
+static inline struct task_struct *get_current(void) 
+{ 
+       struct task_struct *t = read_pda(pcurrent); 
+       return t;
+} 
+
+
+
+#define current get_current()
+
+#else
+
+#ifndef ASM_OFFSET_H
+#include <asm/offset.h> 
+#endif
+
+#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
+
+#endif
+
+#endif /* !(_X86_64_CURRENT_H) */
diff --git a/include/asm-x86_64/debugreg.h b/include/asm-x86_64/debugreg.h
new file mode 100644 (file)
index 0000000..81c8f09
--- /dev/null
@@ -0,0 +1,65 @@
+#ifndef _X86_64_DEBUGREG_H
+#define _X86_64_DEBUGREG_H
+
+
+/* Indicate the register numbers for a number of the specific
+   debug registers.  Registers 0-3 contain the addresses we wish to trap on */
+#define DR_FIRSTADDR 0        /* u_debugreg[DR_FIRSTADDR] */
+#define DR_LASTADDR 3         /* u_debugreg[DR_LASTADDR]  */
+
+#define DR_STATUS 6           /* u_debugreg[DR_STATUS]     */
+#define DR_CONTROL 7          /* u_debugreg[DR_CONTROL] */
+
+/* Define a few things for the status register.  We can use this to determine
+   which debugging register was responsible for the trap.  The other bits
+   are either reserved or not of interest to us. */
+
+#define DR_TRAP0       (0x1)           /* db0 */
+#define DR_TRAP1       (0x2)           /* db1 */
+#define DR_TRAP2       (0x4)           /* db2 */
+#define DR_TRAP3       (0x8)           /* db3 */
+
+#define DR_STEP                (0x4000)        /* single-step */
+#define DR_SWITCH      (0x8000)        /* task switch */
+
+/* Now define a bunch of things for manipulating the control register.
+   The top two bytes of the control register consist of 4 fields of 4
+   bits - each field corresponds to one of the four debug registers,
+   and indicates what types of access we trap on, and how large the data
+   field is that we are looking at */
+
+#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
+#define DR_CONTROL_SIZE 4   /* 4 control bits per register */
+
+#define DR_RW_EXECUTE (0x0)   /* Settings for the access types to trap on */
+#define DR_RW_WRITE (0x1)
+#define DR_RW_READ (0x3)
+
+#define DR_LEN_1 (0x0) /* Settings for data length to trap on */
+#define DR_LEN_2 (0x4)
+#define DR_LEN_4 (0xC)
+#define DR_LEN_8 (0x8)
+
+/* The low byte to the control register determine which registers are
+   enabled.  There are 4 fields of two bits.  One bit is "local", meaning
+   that the processor will reset the bit after a task switch and the other
+   is global meaning that we have to explicitly reset the bit.  With linux,
+   you can use either one, since we explicitly zero the register when we enter
+   kernel mode. */
+
+#define DR_LOCAL_ENABLE_SHIFT 0    /* Extra shift to the local enable bit */
+#define DR_GLOBAL_ENABLE_SHIFT 1   /* Extra shift to the global enable bit */
+#define DR_ENABLE_SIZE 2           /* 2 enable bits per register */
+
+#define DR_LOCAL_ENABLE_MASK (0x55)  /* Set  local bits for all 4 regs */
+#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */
+
+/* The second byte to the control register has a few special things.
+   We can slow the instruction pipeline for instructions coming via the
+   gdt or the ldt if we want to.  I am not sure why this is an advantage */
+
+#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
+#define DR_LOCAL_SLOWDOWN (0x100)   /* Local slow the pipeline */
+#define DR_GLOBAL_SLOWDOWN (0x200)  /* Global slow the pipeline */
+
+#endif
diff --git a/include/asm-x86_64/delay.h b/include/asm-x86_64/delay.h
new file mode 100644 (file)
index 0000000..04cbfde
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef _X8664_DELAY_H
+#define _X8664_DELAY_H
+
+/*
+ * Copyright (C) 1993 Linus Torvalds
+ *
+ * Delay routines calling functions in arch/x86_64/lib/delay.c
+ */
+extern void __bad_udelay(void);
+
+extern void __udelay(unsigned long usecs);
+extern void __const_udelay(unsigned long usecs);
+extern void __delay(unsigned long loops);
+
+#define udelay(n) (__builtin_constant_p(n) ? \
+       ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c6ul)) : \
+       __udelay(n))
+
+#endif /* defined(_X8664_DELAY_H) */
diff --git a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h
new file mode 100644 (file)
index 0000000..42fcd43
--- /dev/null
@@ -0,0 +1,187 @@
+/* Written 2000 by Andi Kleen */ 
+#ifndef __ARCH_DESC_H
+#define __ARCH_DESC_H
+
+#include <linux/threads.h>
+#include <asm/ldt.h>
+
+#ifndef __ASSEMBLY__
+
+/* Keep this syncronized with kernel/head.S */
+#define TSS_START (7 * 8)
+#define LDT_START (TSS_START + NR_CPUS*16) 
+
+#define __TSS(n)  (TSS_START + (n)*16)
+#define __LDT(n)  (LDT_START + (n)*16)
+
+extern __u8 tss_start[]; 
+extern __u8 gdt_table[];
+extern __u8 ldt_start[];
+extern __u8 gdt_end[];
+
+enum { 
+       GATE_INTERRUPT = 0xE, 
+       GATE_TRAP = 0xF,        
+       GATE_CALL = 0xC,
+};     
+
+// 16byte gate
+struct gate_struct {          
+       u16 offset_low;
+       u16 segment; 
+       unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
+       u16 offset_middle;
+       u32 offset_high;
+       u32 zero1; 
+} __attribute__((packed));
+
+#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF) 
+#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF)
+#define PTR_HIGH(x) ((unsigned long)(x) >> 32)
+
+// 8 byte segment descriptor
+struct desc_struct { 
+       u16 limit0;
+       u16 base0;
+       unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1;
+       unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8;
+} __attribute__((packed)); 
+
+enum { 
+       DESC_TSS = 0x9,
+       DESC_LDT = 0x2,
+       TSSLIMIT = 0x67,        
+}; 
+
+// LDT or TSS descriptor in the GDT. 16 bytes.
+struct ldttss_desc { 
+       u16 limit0;
+       u16 base0;
+       unsigned base1 : 8, type : 5, dpl : 2, p : 1;
+       unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
+       u32 base3;
+       u32 zero1; 
+} __attribute__((packed)); 
+
+struct desc_ptr {
+       unsigned short size;
+       unsigned long address;
+} __attribute__((packed)) ;
+
+/* FIXME: these should use more generic register classes */
+#define load_TR(n) asm volatile("ltr %%ax"::"a" (__TSS(n)))
+#define __load_LDT(n) asm volatile("lldt %%ax"::"a" (__LDT(n)))
+
+/*
+ * This is the ldt that every process will get unless we need
+ * something other than this.
+ */
+extern struct desc_struct default_ldt[];
+extern struct gate_struct idt_table[]; 
+
+static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist)  
+{
+       struct gate_struct s;   
+       s.offset_low = PTR_LOW(func); 
+       s.segment = __KERNEL_CS;
+       s.ist = ist; 
+       s.p = 1;
+       s.dpl = dpl; 
+       s.zero0 = 0;
+       s.zero1 = 0; 
+       s.type = type; 
+       s.offset_middle = PTR_MIDDLE(func); 
+       s.offset_high = PTR_HIGH(func); 
+       /* does not need to be atomic because it is only done once at setup time */ 
+       memcpy(adr, &s, 16); 
+} 
+
+static inline void set_intr_gate(int nr, void *func) 
+{ 
+       _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); 
+} 
+
+static inline void set_intr_gate_ist(int nr, void *func, unsigned ist) 
+{ 
+       _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist); 
+} 
+
+static inline void set_system_gate(int nr, void *func) 
+{ 
+       _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); 
+} 
+
+static inline void set_trap_gate(int nr, void *func) 
+{ 
+       _set_gate(&idt_table[nr], GATE_TRAP, (unsigned long) func, 0, 0); 
+} 
+
+static inline void set_call_gate(void *adr, void *func) 
+{ 
+       _set_gate(adr, GATE_CALL, (unsigned long) func, 3, 0); 
+} 
+
+static inline void set_priv_gate(int nr, void *func) 
+{ 
+       _set_gate(&idt_table[nr], GATE_TRAP, (unsigned long) func, 0, 0);       
+} 
+
+static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, 
+                                        unsigned size) 
+{ 
+       struct ldttss_desc d;
+       memset(&d,0,sizeof(d)); 
+       d.limit0 = size & 0xFFFF;
+       d.base0 = PTR_LOW(tss); 
+       d.base1 = PTR_MIDDLE(tss) & 0xFF; 
+       d.type = type;
+       d.p = 1; 
+       d.limit1 = 0xF;
+       d.base2 = (PTR_MIDDLE(tss) >> 8) & 0xFF; 
+       d.base3 = PTR_HIGH(tss); 
+       memcpy(ptr, &d, 16); 
+}
+
+static inline void set_tss_desc(unsigned n, void *addr)
+{ 
+       set_tssldt_descriptor((__u8*)gdt_table + __TSS(n), (unsigned long)addr, DESC_TSS, 
+                             TSSLIMIT); 
+} 
+
+static inline void set_ldt_desc(unsigned n, void *addr, int size)
+{ 
+       set_tssldt_descriptor((__u8*)gdt_table + __LDT(n), (unsigned long)addr, DESC_LDT, size); 
+} 
+
+
+#ifndef MINIKERNEL
+extern inline void clear_LDT(void)
+{
+       int cpu = smp_processor_id();
+       set_ldt_desc(cpu, &default_ldt[0], 5);
+       __load_LDT(cpu);
+}
+
+
+/*
+ * load one particular LDT into the current CPU
+ */
+extern inline void load_LDT (struct mm_struct *mm)
+{
+       int cpu = smp_processor_id();
+       void *segments = mm->context.segments;
+       int count = LDT_ENTRIES;
+
+       if (!segments) {
+               segments = &default_ldt[0];
+               count = 5;
+       }
+               
+       set_ldt_desc(cpu, segments, count);
+       __load_LDT(cpu);
+}
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
+#endif
diff --git a/include/asm-x86_64/div64.h b/include/asm-x86_64/div64.h
new file mode 100644 (file)
index 0000000..2c94d07
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef __X86_64_DIV64
+#define __X86_64_DIV64
+
+/*
+ * Hey, we're already 64-bit, no
+ * need to play games..
+ */
+#define do_div(n,base) ({ \
+       int __res; \
+       __res = ((unsigned long) (n)) % (unsigned) (base); \
+       (n) = ((unsigned long) (n)) / (unsigned) (base); \
+       __res; })
+
+#endif
diff --git a/include/asm-x86_64/dma.h b/include/asm-x86_64/dma.h
new file mode 100644 (file)
index 0000000..16fa3a0
--- /dev/null
@@ -0,0 +1,298 @@
+/* $Id: dma.h,v 1.1.1.1 2001/04/19 20:00:38 ak Exp $
+ * linux/include/asm/dma.h: Defines for using and allocating dma channels.
+ * Written by Hennus Bergman, 1992.
+ * High DMA channel support & info by Hannu Savolainen
+ * and John Boyd, Nov. 1992.
+ */
+
+#ifndef _ASM_DMA_H
+#define _ASM_DMA_H
+
+#include <linux/config.h>
+#include <linux/spinlock.h>    /* And spinlocks */
+#include <asm/io.h>            /* need byte IO */
+#include <linux/delay.h>
+
+
+#ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER
+#define dma_outb       outb_p
+#else
+#define dma_outb       outb
+#endif
+
+#define dma_inb                inb
+
+/*
+ * NOTES about DMA transfers:
+ *
+ *  controller 1: channels 0-3, byte operations, ports 00-1F
+ *  controller 2: channels 4-7, word operations, ports C0-DF
+ *
+ *  - ALL registers are 8 bits only, regardless of transfer size
+ *  - channel 4 is not used - cascades 1 into 2.
+ *  - channels 0-3 are byte - addresses/counts are for physical bytes
+ *  - channels 5-7 are word - addresses/counts are for physical words
+ *  - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries
+ *  - transfer count loaded to registers is 1 less than actual count
+ *  - controller 2 offsets are all even (2x offsets for controller 1)
+ *  - page registers for 5-7 don't use data bit 0, represent 128K pages
+ *  - page registers for 0-3 use bit 0, represent 64K pages
+ *
+ * DMA transfers are limited to the lower 16MB of _physical_ memory.  
+ * Note that addresses loaded into registers must be _physical_ addresses,
+ * not logical addresses (which may differ if paging is active).
+ *
+ *  Address mapping for channels 0-3:
+ *
+ *   A23 ... A16 A15 ... A8  A7 ... A0    (Physical addresses)
+ *    |  ...  |   |  ... |   |  ... |
+ *    |  ...  |   |  ... |   |  ... |
+ *    |  ...  |   |  ... |   |  ... |
+ *   P7  ...  P0  A7 ... A0  A7 ... A0   
+ * |    Page    | Addr MSB | Addr LSB |   (DMA registers)
+ *
+ *  Address mapping for channels 5-7:
+ *
+ *   A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0    (Physical addresses)
+ *    |  ...  |   \   \   ... \  \  \  ... \  \
+ *    |  ...  |    \   \   ... \  \  \  ... \  (not used)
+ *    |  ...  |     \   \   ... \  \  \  ... \
+ *   P7  ...  P1 (0) A7 A6  ... A0 A7 A6 ... A0   
+ * |      Page      |  Addr MSB   |  Addr LSB  |   (DMA registers)
+ *
+ * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses
+ * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at
+ * the hardware level, so odd-byte transfers aren't possible).
+ *
+ * Transfer count (_not # bytes_) is limited to 64K, represented as actual
+ * count - 1 : 64K => 0xFFFF, 1 => 0x0000.  Thus, count is always 1 or more,
+ * and up to 128K bytes may be transferred on channels 5-7 in one operation. 
+ *
+ */
+
+#define MAX_DMA_CHANNELS       8
+
+/* The maximum address that we can perform a DMA transfer to on this platform */
+#define MAX_DMA_ADDRESS      (PAGE_OFFSET+0x1000000)
+
+/* 8237 DMA controllers */
+#define IO_DMA1_BASE   0x00    /* 8 bit slave DMA, channels 0..3 */
+#define IO_DMA2_BASE   0xC0    /* 16 bit master DMA, ch 4(=slave input)..7 */
+
+/* DMA controller registers */
+#define DMA1_CMD_REG           0x08    /* command register (w) */
+#define DMA1_STAT_REG          0x08    /* status register (r) */
+#define DMA1_REQ_REG            0x09    /* request register (w) */
+#define DMA1_MASK_REG          0x0A    /* single-channel mask (w) */
+#define DMA1_MODE_REG          0x0B    /* mode register (w) */
+#define DMA1_CLEAR_FF_REG      0x0C    /* clear pointer flip-flop (w) */
+#define DMA1_TEMP_REG           0x0D    /* Temporary Register (r) */
+#define DMA1_RESET_REG         0x0D    /* Master Clear (w) */
+#define DMA1_CLR_MASK_REG       0x0E    /* Clear Mask */
+#define DMA1_MASK_ALL_REG       0x0F    /* all-channels mask (w) */
+
+#define DMA2_CMD_REG           0xD0    /* command register (w) */
+#define DMA2_STAT_REG          0xD0    /* status register (r) */
+#define DMA2_REQ_REG            0xD2    /* request register (w) */
+#define DMA2_MASK_REG          0xD4    /* single-channel mask (w) */
+#define DMA2_MODE_REG          0xD6    /* mode register (w) */
+#define DMA2_CLEAR_FF_REG      0xD8    /* clear pointer flip-flop (w) */
+#define DMA2_TEMP_REG           0xDA    /* Temporary Register (r) */
+#define DMA2_RESET_REG         0xDA    /* Master Clear (w) */
+#define DMA2_CLR_MASK_REG       0xDC    /* Clear Mask */
+#define DMA2_MASK_ALL_REG       0xDE    /* all-channels mask (w) */
+
+#define DMA_ADDR_0              0x00    /* DMA address registers */
+#define DMA_ADDR_1              0x02
+#define DMA_ADDR_2              0x04
+#define DMA_ADDR_3              0x06
+#define DMA_ADDR_4              0xC0
+#define DMA_ADDR_5              0xC4
+#define DMA_ADDR_6              0xC8
+#define DMA_ADDR_7              0xCC
+
+#define DMA_CNT_0               0x01    /* DMA count registers */
+#define DMA_CNT_1               0x03
+#define DMA_CNT_2               0x05
+#define DMA_CNT_3               0x07
+#define DMA_CNT_4               0xC2
+#define DMA_CNT_5               0xC6
+#define DMA_CNT_6               0xCA
+#define DMA_CNT_7               0xCE
+
+#define DMA_PAGE_0              0x87    /* DMA page registers */
+#define DMA_PAGE_1              0x83
+#define DMA_PAGE_2              0x81
+#define DMA_PAGE_3              0x82
+#define DMA_PAGE_5              0x8B
+#define DMA_PAGE_6              0x89
+#define DMA_PAGE_7              0x8A
+
+#define DMA_MODE_READ  0x44    /* I/O to memory, no autoinit, increment, single mode */
+#define DMA_MODE_WRITE 0x48    /* memory to I/O, no autoinit, increment, single mode */
+#define DMA_MODE_CASCADE 0xC0   /* pass thru DREQ->HRQ, DACK<-HLDA only */
+
+#define DMA_AUTOINIT   0x10
+
+
+extern spinlock_t  dma_spin_lock;
+
+static __inline__ unsigned long claim_dma_lock(void)
+{
+       unsigned long flags;
+       spin_lock_irqsave(&dma_spin_lock, flags);
+       return flags;
+}
+
+static __inline__ void release_dma_lock(unsigned long flags)
+{
+       spin_unlock_irqrestore(&dma_spin_lock, flags);
+}
+
+/* enable/disable a specific DMA channel */
+static __inline__ void enable_dma(unsigned int dmanr)
+{
+       if (dmanr<=3)
+               dma_outb(dmanr,  DMA1_MASK_REG);
+       else
+               dma_outb(dmanr & 3,  DMA2_MASK_REG);
+}
+
+static __inline__ void disable_dma(unsigned int dmanr)
+{
+       if (dmanr<=3)
+               dma_outb(dmanr | 4,  DMA1_MASK_REG);
+       else
+               dma_outb((dmanr & 3) | 4,  DMA2_MASK_REG);
+}
+
+/* Clear the 'DMA Pointer Flip Flop'.
+ * Write 0 for LSB/MSB, 1 for MSB/LSB access.
+ * Use this once to initialize the FF to a known state.
+ * After that, keep track of it. :-)
+ * --- In order to do that, the DMA routines below should ---
+ * --- only be used while holding the DMA lock ! ---
+ */
+static __inline__ void clear_dma_ff(unsigned int dmanr)
+{
+       if (dmanr<=3)
+               dma_outb(0,  DMA1_CLEAR_FF_REG);
+       else
+               dma_outb(0,  DMA2_CLEAR_FF_REG);
+}
+
+/* set mode (above) for a specific DMA channel */
+static __inline__ void set_dma_mode(unsigned int dmanr, char mode)
+{
+       if (dmanr<=3)
+               dma_outb(mode | dmanr,  DMA1_MODE_REG);
+       else
+               dma_outb(mode | (dmanr&3),  DMA2_MODE_REG);
+}
+
+/* Set only the page register bits of the transfer address.
+ * This is used for successive transfers when we know the contents of
+ * the lower 16 bits of the DMA current address register, but a 64k boundary
+ * may have been crossed.
+ */
+static __inline__ void set_dma_page(unsigned int dmanr, char pagenr)
+{
+       switch(dmanr) {
+               case 0:
+                       dma_outb(pagenr, DMA_PAGE_0);
+                       break;
+               case 1:
+                       dma_outb(pagenr, DMA_PAGE_1);
+                       break;
+               case 2:
+                       dma_outb(pagenr, DMA_PAGE_2);
+                       break;
+               case 3:
+                       dma_outb(pagenr, DMA_PAGE_3);
+                       break;
+               case 5:
+                       dma_outb(pagenr & 0xfe, DMA_PAGE_5);
+                       break;
+               case 6:
+                       dma_outb(pagenr & 0xfe, DMA_PAGE_6);
+                       break;
+               case 7:
+                       dma_outb(pagenr & 0xfe, DMA_PAGE_7);
+                       break;
+       }
+}
+
+
+/* Set transfer address & page bits for specific DMA channel.
+ * Assumes dma flipflop is clear.
+ */
+static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a)
+{
+       set_dma_page(dmanr, a>>16);
+       if (dmanr <= 3)  {
+           dma_outb( a & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
+            dma_outb( (a>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE );
+       }  else  {
+           dma_outb( (a>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
+           dma_outb( (a>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE );
+       }
+}
+
+
+/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for
+ * a specific DMA channel.
+ * You must ensure the parameters are valid.
+ * NOTE: from a manual: "the number of transfers is one more
+ * than the initial word count"! This is taken into account.
+ * Assumes dma flip-flop is clear.
+ * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7.
+ */
+static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count)
+{
+        count--;
+       if (dmanr <= 3)  {
+           dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
+           dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE );
+        } else {
+           dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
+           dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE );
+        }
+}
+
+
+/* Get DMA residue count. After a DMA transfer, this
+ * should return zero. Reading this while a DMA transfer is
+ * still in progress will return unpredictable results.
+ * If called before the channel has been used, it may return 1.
+ * Otherwise, it returns the number of _bytes_ left to transfer.
+ *
+ * Assumes DMA flip-flop is clear.
+ */
+static __inline__ int get_dma_residue(unsigned int dmanr)
+{
+       unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE
+                                        : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE;
+
+       /* using short to get 16-bit wrap around */
+       unsigned short count;
+
+       count = 1 + dma_inb(io_port);
+       count += dma_inb(io_port) << 8;
+       
+       return (dmanr<=3)? count : (count<<1);
+}
+
+
+/* These are in kernel/dma.c: */
+extern int request_dma(unsigned int dmanr, const char * device_id);    /* reserve a DMA channel */
+extern void free_dma(unsigned int dmanr);      /* release it again */
+
+/* From PCI */
+
+#ifdef CONFIG_PCI
+extern int isa_dma_bridge_buggy;
+#else
+#define isa_dma_bridge_buggy   (0)
+#endif
+
+#endif /* _ASM_DMA_H */
diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h
new file mode 100644 (file)
index 0000000..40e3a86
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * structures and definitions for the int 15, ax=e820 memory map
+ * scheme.
+ *
+ * In a nutshell, arch/x86_64/boot/setup.S populates a scratch table
+ * in the empty_zero_block that contains a list of usable address/size
+ * duples.   In arch/x86_64/kernel/setup.c, this information is
+ * transferred into the e820map, and in arch/i386/x86_64/init.c, that
+ * new information is used to mark pages reserved or not.
+ *
+ */
+#ifndef __E820_HEADER
+#define __E820_HEADER
+
+#define E820MAP        0x2d0           /* our map */
+#define E820MAX        32              /* number of entries in E820MAP */
+#define E820NR 0x1e8           /* # entries in E820MAP */
+
+#define E820_RAM       1
+#define E820_RESERVED  2
+#define E820_ACPI      3 /* usable as RAM once ACPI tables have been read */
+#define E820_NVS       4
+
+#define HIGH_MEMORY    (1024*1024)
+
+#ifndef __ASSEMBLY__
+
+struct e820map {
+    int nr_map;
+    struct e820entry {
+       u64 addr __attribute__((packed));       /* start of memory segment */
+       u64 size __attribute__((packed));       /* size of memory segment */
+       u32 type __attribute__((packed));       /* type of memory segment */
+    } map[E820MAX];
+};
+
+extern struct e820map e820;
+#endif/*!__ASSEMBLY__*/
+
+#endif/*__E820_HEADER*/
diff --git a/include/asm-x86_64/elf.h b/include/asm-x86_64/elf.h
new file mode 100644 (file)
index 0000000..e04c867
--- /dev/null
@@ -0,0 +1,120 @@
+#ifndef __ASM_X86_64_ELF_H
+#define __ASM_X86_64_ELF_H
+
+/*
+ * ELF register definitions..
+ */
+
+#include <asm/ptrace.h>
+#include <asm/user.h>
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef struct user_i387_struct elf_fpregset_t;
+typedef struct user_fxsr_struct elf_fpxregset_t;
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) \
+       ((x)->e_machine == EM_X8664)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS      ELFCLASS64
+#define ELF_DATA       ELFDATA2LSB
+#define ELF_ARCH       EM_X8664
+
+/* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx
+   contains a pointer to a function which might be registered using `atexit'.
+   This provides a mean for the dynamic linker to call DT_FINI functions for
+   shared libraries that have been loaded before the code runs.
+
+   A value of 0 tells we have no such handler. 
+
+   We might as well make sure everything else is cleared too (except for %rsp),
+   just to make things more deterministic.
+ */
+#define ELF_PLAT_INIT(_r)      do { \
+       struct task_struct *cur = current; \
+       (_r)->rbx = 0; (_r)->rcx = 0; (_r)->rdx = 0; \
+       (_r)->rsi = 0; (_r)->rdi = 0; (_r)->rbp = 0; \
+       (_r)->rax = 0;                          \
+       (_r)->r8 = 0;                           \
+       (_r)->r9 = 0;                           \
+       (_r)->r10 = 0;                          \
+       (_r)->r11 = 0;                          \
+       (_r)->r12 = 0;                          \
+       (_r)->r13 = 0;                          \
+       (_r)->r14 = 0;                          \
+       (_r)->r15 = 0;                          \
+        cur->thread.fs = 0; cur->thread.gs = 0; \
+       cur->thread.fsindex = 0; cur->thread.gsindex = 0; \
+        cur->thread.ds = 0; cur->thread.es = 0;  \
+} while (0)
+
+#define USE_ELF_CORE_DUMP
+#define ELF_EXEC_PAGESIZE      4096
+
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+   use of this is to invoke "./ld.so someprog" to test out a new version of
+   the loader.  We need to make sure that it is out of the way of the program
+   that it will "exec", and that there is sufficient room for the brk.  */
+
+#define ELF_ET_DYN_BASE         (2 * TASK_SIZE / 3)
+
+/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is
+   now struct_user_regs, they are different) */
+
+#define ELF_CORE_COPY_REGS(pr_reg, regs)               \
+       (pr_reg)[0] = (regs)->r15;                              \
+       (pr_reg)[1] = (regs)->r14;                              \
+       (pr_reg)[2] = (regs)->r13;                              \
+       (pr_reg)[3] = (regs)->r12;                              \
+       (pr_reg)[4] = (regs)->rbp;                              \
+       (pr_reg)[5] = (regs)->rbx;                              \
+       (pr_reg)[6] = (regs)->r11;                              \
+       (pr_reg)[7] = (regs)->r10;                              \
+       (pr_reg)[8] = (regs)->r9;                               \
+       (pr_reg)[9] = (regs)->r8;                               \
+       (pr_reg)[10] = (regs)->rax;                             \
+       (pr_reg)[11] = (regs)->rcx;                             \
+       (pr_reg)[12] = (regs)->rdx;                             \
+       (pr_reg)[13] = (regs)->rsi;                             \
+       (pr_reg)[14] = (regs)->rdi;                             \
+       (pr_reg)[15] = (regs)->orig_rax;                        \
+       (pr_reg)[16] = (regs)->rip;                     \
+       (pr_reg)[17] = (regs)->cs;                      \
+       (pr_reg)[18] = (regs)->eflags;                  \
+       (pr_reg)[19] = (regs)->rsp;                     \
+       (pr_reg)[20] = (regs)->ss;                      \
+       rdmsrl(MSR_FS_BASE, (pr_reg)[21]);              \
+       rdmsrl(MSR_KERNEL_GS_BASE, (pr_reg)[22]);
+
+/* This yields a mask that user programs can use to figure out what
+   instruction set this CPU supports.  This could be done in user space,
+   but it's not easy, and we've already done it here.  */
+
+#define ELF_HWCAP      (boot_cpu_data.x86_capability[0])
+
+/* This yields a string that ld.so will use to load implementation
+   specific libraries for optimization.  This is more specific in
+   intent than poking at uname or /proc/cpuinfo.
+
+   For the moment, we have only optimizations for the Intel generations,
+   but that could change... */
+
+/* I'm not sure if we can use '-' here */
+#define ELF_PLATFORM  ("x86_64")
+
+#ifdef __KERNEL__
+extern void set_personality_64bit(void);
+#define SET_PERSONALITY(ex, ibcs2) set_personality_64bit()
+       
+#endif
+
+#endif
diff --git a/include/asm-x86_64/errno.h b/include/asm-x86_64/errno.h
new file mode 100644 (file)
index 0000000..1c5ff49
--- /dev/null
@@ -0,0 +1,132 @@
+#ifndef _X8664_ERRNO_H
+#define _X8664_ERRNO_H
+
+#define        EPERM            1      /* Operation not permitted */
+#define        ENOENT           2      /* No such file or directory */
+#define        ESRCH            3      /* No such process */
+#define        EINTR            4      /* Interrupted system call */
+#define        EIO              5      /* I/O error */
+#define        ENXIO            6      /* No such device or address */
+#define        E2BIG            7      /* Arg list too long */
+#define        ENOEXEC          8      /* Exec format error */
+#define        EBADF            9      /* Bad file number */
+#define        ECHILD          10      /* No child processes */
+#define        EAGAIN          11      /* Try again */
+#define        ENOMEM          12      /* Out of memory */
+#define        EACCES          13      /* Permission denied */
+#define        EFAULT          14      /* Bad address */
+#define        ENOTBLK         15      /* Block device required */
+#define        EBUSY           16      /* Device or resource busy */
+#define        EEXIST          17      /* File exists */
+#define        EXDEV           18      /* Cross-device link */
+#define        ENODEV          19      /* No such device */
+#define        ENOTDIR         20      /* Not a directory */
+#define        EISDIR          21      /* Is a directory */
+#define        EINVAL          22      /* Invalid argument */
+#define        ENFILE          23      /* File table overflow */
+#define        EMFILE          24      /* Too many open files */
+#define        ENOTTY          25      /* Not a typewriter */
+#define        ETXTBSY         26      /* Text file busy */
+#define        EFBIG           27      /* File too large */
+#define        ENOSPC          28      /* No space left on device */
+#define        ESPIPE          29      /* Illegal seek */
+#define        EROFS           30      /* Read-only file system */
+#define        EMLINK          31      /* Too many links */
+#define        EPIPE           32      /* Broken pipe */
+#define        EDOM            33      /* Math argument out of domain of func */
+#define        ERANGE          34      /* Math result not representable */
+#define        EDEADLK         35      /* Resource deadlock would occur */
+#define        ENAMETOOLONG    36      /* File name too long */
+#define        ENOLCK          37      /* No record locks available */
+#define        ENOSYS          38      /* Function not implemented */
+#define        ENOTEMPTY       39      /* Directory not empty */
+#define        ELOOP           40      /* Too many symbolic links encountered */
+#define        EWOULDBLOCK     EAGAIN  /* Operation would block */
+#define        ENOMSG          42      /* No message of desired type */
+#define        EIDRM           43      /* Identifier removed */
+#define        ECHRNG          44      /* Channel number out of range */
+#define        EL2NSYNC        45      /* Level 2 not synchronized */
+#define        EL3HLT          46      /* Level 3 halted */
+#define        EL3RST          47      /* Level 3 reset */
+#define        ELNRNG          48      /* Link number out of range */
+#define        EUNATCH         49      /* Protocol driver not attached */
+#define        ENOCSI          50      /* No CSI structure available */
+#define        EL2HLT          51      /* Level 2 halted */
+#define        EBADE           52      /* Invalid exchange */
+#define        EBADR           53      /* Invalid request descriptor */
+#define        EXFULL          54      /* Exchange full */
+#define        ENOANO          55      /* No anode */
+#define        EBADRQC         56      /* Invalid request code */
+#define        EBADSLT         57      /* Invalid slot */
+
+#define        EDEADLOCK       EDEADLK
+
+#define        EBFONT          59      /* Bad font file format */
+#define        ENOSTR          60      /* Device not a stream */
+#define        ENODATA         61      /* No data available */
+#define        ETIME           62      /* Timer expired */
+#define        ENOSR           63      /* Out of streams resources */
+#define        ENONET          64      /* Machine is not on the network */
+#define        ENOPKG          65      /* Package not installed */
+#define        EREMOTE         66      /* Object is remote */
+#define        ENOLINK         67      /* Link has been severed */
+#define        EADV            68      /* Advertise error */
+#define        ESRMNT          69      /* Srmount error */
+#define        ECOMM           70      /* Communication error on send */
+#define        EPROTO          71      /* Protocol error */
+#define        EMULTIHOP       72      /* Multihop attempted */
+#define        EDOTDOT         73      /* RFS specific error */
+#define        EBADMSG         74      /* Not a data message */
+#define        EOVERFLOW       75      /* Value too large for defined data type */
+#define        ENOTUNIQ        76      /* Name not unique on network */
+#define        EBADFD          77      /* File descriptor in bad state */
+#define        EREMCHG         78      /* Remote address changed */
+#define        ELIBACC         79      /* Can not access a needed shared library */
+#define        ELIBBAD         80      /* Accessing a corrupted shared library */
+#define        ELIBSCN         81      /* .lib section in a.out corrupted */
+#define        ELIBMAX         82      /* Attempting to link in too many shared libraries */
+#define        ELIBEXEC        83      /* Cannot exec a shared library directly */
+#define        EILSEQ          84      /* Illegal byte sequence */
+#define        ERESTART        85      /* Interrupted system call should be restarted */
+#define        ESTRPIPE        86      /* Streams pipe error */
+#define        EUSERS          87      /* Too many users */
+#define        ENOTSOCK        88      /* Socket operation on non-socket */
+#define        EDESTADDRREQ    89      /* Destination address required */
+#define        EMSGSIZE        90      /* Message too long */
+#define        EPROTOTYPE      91      /* Protocol wrong type for socket */
+#define        ENOPROTOOPT     92      /* Protocol not available */
+#define        EPROTONOSUPPORT 93      /* Protocol not supported */
+#define        ESOCKTNOSUPPORT 94      /* Socket type not supported */
+#define        EOPNOTSUPP      95      /* Operation not supported on transport endpoint */
+#define        EPFNOSUPPORT    96      /* Protocol family not supported */
+#define        EAFNOSUPPORT    97      /* Address family not supported by protocol */
+#define        EADDRINUSE      98      /* Address already in use */
+#define        EADDRNOTAVAIL   99      /* Cannot assign requested address */
+#define        ENETDOWN        100     /* Network is down */
+#define        ENETUNREACH     101     /* Network is unreachable */
+#define        ENETRESET       102     /* Network dropped connection because of reset */
+#define        ECONNABORTED    103     /* Software caused connection abort */
+#define        ECONNRESET      104     /* Connection reset by peer */
+#define        ENOBUFS         105     /* No buffer space available */
+#define        EISCONN         106     /* Transport endpoint is already connected */
+#define        ENOTCONN        107     /* Transport endpoint is not connected */
+#define        ESHUTDOWN       108     /* Cannot send after transport endpoint shutdown */
+#define        ETOOMANYREFS    109     /* Too many references: cannot splice */
+#define        ETIMEDOUT       110     /* Connection timed out */
+#define        ECONNREFUSED    111     /* Connection refused */
+#define        EHOSTDOWN       112     /* Host is down */
+#define        EHOSTUNREACH    113     /* No route to host */
+#define        EALREADY        114     /* Operation already in progress */
+#define        EINPROGRESS     115     /* Operation now in progress */
+#define        ESTALE          116     /* Stale NFS file handle */
+#define        EUCLEAN         117     /* Structure needs cleaning */
+#define        ENOTNAM         118     /* Not a XENIX named type file */
+#define        ENAVAIL         119     /* No XENIX semaphores available */
+#define        EISNAM          120     /* Is a named type file */
+#define        EREMOTEIO       121     /* Remote I/O error */
+#define        EDQUOT          122     /* Quota exceeded */
+
+#define        ENOMEDIUM       123     /* No medium found */
+#define        EMEDIUMTYPE     124     /* Wrong medium type */
+
+#endif
diff --git a/include/asm-x86_64/fcntl.h b/include/asm-x86_64/fcntl.h
new file mode 100644 (file)
index 0000000..33363ea
--- /dev/null
@@ -0,0 +1,79 @@
+#ifndef _X86_64_FCNTL_H
+#define _X86_64_FCNTL_H
+
+/* open/fcntl - O_SYNC is only implemented on blocks devices and on files
+   located on an ext2 file system */
+#define O_ACCMODE         0003
+#define O_RDONLY            00
+#define O_WRONLY            01
+#define O_RDWR              02
+#define O_CREAT                   0100 /* not fcntl */
+#define O_EXCL            0200 /* not fcntl */
+#define O_NOCTTY          0400 /* not fcntl */
+#define O_TRUNC                  01000 /* not fcntl */
+#define O_APPEND         02000
+#define O_NONBLOCK       04000
+#define O_NDELAY       O_NONBLOCK
+#define O_SYNC          010000
+#define FASYNC          020000 /* fcntl, for BSD compatibility */
+#define O_DIRECT        040000 /* direct disk access hint */
+#define O_LARGEFILE    0100000
+#define O_DIRECTORY    0200000 /* must be a directory */
+#define O_NOFOLLOW     0400000 /* don't follow links */
+
+#define F_DUPFD                0       /* dup */
+#define F_GETFD                1       /* get close_on_exec */
+#define F_SETFD                2       /* set/clear close_on_exec */
+#define F_GETFL                3       /* get file->f_flags */
+#define F_SETFL                4       /* set file->f_flags */
+#define F_GETLK                5
+#define F_SETLK                6
+#define F_SETLKW       7
+
+#define F_SETOWN       8       /*  for sockets. */
+#define F_GETOWN       9       /*  for sockets. */
+#define F_SETSIG       10      /*  for sockets. */
+#define F_GETSIG       11      /*  for sockets. */
+
+/* for F_[GET|SET]FL */
+#define FD_CLOEXEC     1       /* actually anything with low bit set goes */
+
+/* for posix fcntl() and lockf() */
+#define F_RDLCK                0
+#define F_WRLCK                1
+#define F_UNLCK                2
+
+/* for old implementation of bsd flock () */
+#define F_EXLCK                4       /* or 3 */
+#define F_SHLCK                8       /* or 4 */
+
+/* for leases */
+#define F_INPROGRESS   16
+
+/* operations for bsd flock(), also used by the kernel implementation */
+#define LOCK_SH                1       /* shared lock */
+#define LOCK_EX                2       /* exclusive lock */
+#define LOCK_NB                4       /* or'd with one of the above to prevent
+                                  blocking */
+#define LOCK_UN                8       /* remove lock */
+
+#define LOCK_MAND      32      /* This is a mandatory flock */
+#define LOCK_READ      64      /* ... Which allows concurrent read operations */
+#define LOCK_WRITE     128     /* ... Which allows concurrent write operations */
+#define LOCK_RW                192     /* ... Which allows concurrent read & write ops */
+
+struct flock {
+       short  l_type;
+       short  l_whence;
+       off_t l_start;
+       off_t l_len;
+       pid_t  l_pid;
+};
+
+#define F_LINUX_SPECIFIC_BASE  1024
+
+#ifdef __KERNEL__
+#define flock64        flock
+#endif
+
+#endif /* !_X86_64_FCNTL_H */
diff --git a/include/asm-x86_64/fixmap.h b/include/asm-x86_64/fixmap.h
new file mode 100644 (file)
index 0000000..7241b90
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ */
+
+#ifndef _ASM_FIXMAP_H
+#define _ASM_FIXMAP_H
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <asm/apicdef.h>
+#include <asm/page.h>
+#include <asm/vsyscall.h>
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process. We allocate these special  addresses
+ * from the end of virtual memory (0xfffff000) backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * highger than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ */
+
+/*
+ * on UP currently we will have no trace of the fixmap mechanizm,
+ * no page table allocations, etc. This might change in the
+ * future, say framebuffers for the console driver(s) could be
+ * fix-mapped?
+ */
+enum fixed_addresses {
+       VSYSCALL_LAST_PAGE,
+       VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
+#ifdef CONFIG_X86_LOCAL_APIC
+       FIX_APIC_BASE,  /* local (CPU) APIC) -- required for SMP or not */
+#endif
+#ifdef CONFIG_X86_IO_APIC
+       FIX_IO_APIC_BASE_0,
+       FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
+#endif
+       __end_of_fixed_addresses
+};
+
+extern void __set_fixmap (enum fixed_addresses idx,
+                                       unsigned long phys, pgprot_t flags);
+
+#define set_fixmap(idx, phys) \
+               __set_fixmap(idx, phys, PAGE_KERNEL)
+/*
+ * Some hardware wants to get fixmapped without caching.
+ */
+#define set_fixmap_nocache(idx, phys) \
+               __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
+/*
+ * used by vmalloc.c.
+ *
+ * Leave one empty page between vmalloc'ed areas and
+ * the start of the fixmap, and leave one page empty
+ * at the top of mem..
+ */
+#define FIXADDR_TOP    (VSYSCALL_END-PAGE_SIZE)
+#define FIXADDR_SIZE   (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START  (FIXADDR_TOP - FIXADDR_SIZE)
+
+#define __fix_to_virt(x)       (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+
+extern void __this_fixmap_does_not_exist(void);
+
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without tranlation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+extern inline unsigned long fix_to_virt(const unsigned int idx)
+{
+       /*
+        * this branch gets completely eliminated after inlining,
+        * except when someone tries to use fixaddr indices in an
+        * illegal way. (such as mixing up address types or using
+        * out-of-range indices).
+        *
+        * If it doesn't get removed, the linker will complain
+        * loudly with a reasonably clear error message..
+        */
+       if (idx >= __end_of_fixed_addresses)
+               __this_fixmap_does_not_exist();
+
+        return __fix_to_virt(idx);
+}
+
+#endif
diff --git a/include/asm-x86_64/floppy.h b/include/asm-x86_64/floppy.h
new file mode 100644 (file)
index 0000000..cc1eacf
--- /dev/null
@@ -0,0 +1,286 @@
+/*
+ * Architecture specific parts of the Floppy driver
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1995
+ */
+#ifndef __ASM_X86_64_FLOPPY_H
+#define __ASM_X86_64_FLOPPY_H
+
+#include <linux/vmalloc.h>
+
+
+/*
+ * The DMA channel used by the floppy controller cannot access data at
+ * addresses >= 16MB
+ *
+ * Went back to the 1MB limit, as some people had problems with the floppy
+ * driver otherwise. It doesn't matter much for performance anyway, as most
+ * floppy accesses go through the track buffer.
+ */
+#define _CROSS_64KB(a,s,vdma) \
+(!vdma && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
+
+#define CROSS_64KB(a,s) _CROSS_64KB(a,s,use_virtual_dma & 1)
+
+
+#define SW fd_routine[use_virtual_dma&1]
+#define CSW fd_routine[can_use_virtual_dma & 1]
+
+
+#define fd_inb(port)                   inb_p(port)
+#define fd_outb(port,value)            outb_p(port,value)
+
+#define fd_request_dma()        CSW._request_dma(FLOPPY_DMA,"floppy")
+#define fd_free_dma()           CSW._free_dma(FLOPPY_DMA)
+#define fd_enable_irq()         enable_irq(FLOPPY_IRQ)
+#define fd_disable_irq()        disable_irq(FLOPPY_IRQ)
+#define fd_free_irq()          free_irq(FLOPPY_IRQ, NULL)
+#define fd_get_dma_residue()    SW._get_dma_residue(FLOPPY_DMA)
+#define fd_dma_mem_alloc(size) SW._dma_mem_alloc(size)
+#define fd_dma_setup(addr, size, mode, io) SW._dma_setup(addr, size, mode, io)
+
+#define FLOPPY_CAN_FALLBACK_ON_NODMA
+
+static int virtual_dma_count;
+static int virtual_dma_residue;
+static char *virtual_dma_addr;
+static int virtual_dma_mode;
+static int doing_pdma;
+
+static void floppy_hardint(int irq, void *dev_id, struct pt_regs * regs)
+{
+       register unsigned char st;
+
+#undef TRACE_FLPY_INT
+
+#ifdef TRACE_FLPY_INT
+       static int calls=0;
+       static int bytes=0;
+       static int dma_wait=0;
+#endif
+       if(!doing_pdma) {
+               floppy_interrupt(irq, dev_id, regs);
+               return;
+       }
+
+#ifdef TRACE_FLPY_INT
+       if(!calls)
+               bytes = virtual_dma_count;
+#endif
+
+       {
+               register int lcount;
+               register char *lptr;
+
+               st = 1;
+               for(lcount=virtual_dma_count, lptr=virtual_dma_addr; 
+                   lcount; lcount--, lptr++) {
+                       st=inb(virtual_dma_port+4) & 0xa0 ;
+                       if(st != 0xa0) 
+                               break;
+                       if(virtual_dma_mode)
+                               outb_p(*lptr, virtual_dma_port+5);
+                       else
+                               *lptr = inb_p(virtual_dma_port+5);
+               }
+               virtual_dma_count = lcount;
+               virtual_dma_addr = lptr;
+               st = inb(virtual_dma_port+4);
+       }
+
+#ifdef TRACE_FLPY_INT
+       calls++;
+#endif
+       if(st == 0x20)
+               return;
+       if(!(st & 0x20)) {
+               virtual_dma_residue += virtual_dma_count;
+               virtual_dma_count=0;
+#ifdef TRACE_FLPY_INT
+               printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", 
+                      virtual_dma_count, virtual_dma_residue, calls, bytes,
+                      dma_wait);
+               calls = 0;
+               dma_wait=0;
+#endif
+               doing_pdma = 0;
+               floppy_interrupt(irq, dev_id, regs);
+               return;
+       }
+#ifdef TRACE_FLPY_INT
+       if(!virtual_dma_count)
+               dma_wait++;
+#endif
+}
+
+static void fd_disable_dma(void)
+{
+       if(! (can_use_virtual_dma & 1))
+               disable_dma(FLOPPY_DMA);
+       doing_pdma = 0;
+       virtual_dma_residue += virtual_dma_count;
+       virtual_dma_count=0;
+}
+
+static int vdma_request_dma(unsigned int dmanr, const char * device_id)
+{
+       return 0;
+}
+
+static void vdma_nop(unsigned int dummy)
+{
+}
+
+
+static int vdma_get_dma_residue(unsigned int dummy)
+{
+       return virtual_dma_count + virtual_dma_residue;
+}
+
+
+static int fd_request_irq(void)
+{
+       if(can_use_virtual_dma)
+               return request_irq(FLOPPY_IRQ, floppy_hardint,SA_INTERRUPT,
+                                                  "floppy", NULL);
+       else
+               return request_irq(FLOPPY_IRQ, floppy_interrupt,
+                                                  SA_INTERRUPT|SA_SAMPLE_RANDOM,
+                                                  "floppy", NULL);     
+
+}
+
+static unsigned long dma_mem_alloc(unsigned long size)
+{
+       return __get_dma_pages(GFP_KERNEL,get_order(size));
+}
+
+
+static unsigned long vdma_mem_alloc(unsigned long size)
+{
+       return (unsigned long) vmalloc(size);
+
+}
+
+#define nodma_mem_alloc(size) vdma_mem_alloc(size)
+
+static void _fd_dma_mem_free(unsigned long addr, unsigned long size)
+{
+       if((unsigned long) addr >= (unsigned long) high_memory)
+               return vfree((void *)addr);
+       else
+               free_pages(addr, get_order(size));              
+}
+
+#define fd_dma_mem_free(addr, size)  _fd_dma_mem_free(addr, size) 
+
+static void _fd_chose_dma_mode(char *addr, unsigned long size)
+{
+       if(can_use_virtual_dma == 2) {
+               if((unsigned long) addr >= (unsigned long) high_memory ||
+                  isa_virt_to_bus(addr) >= 0x1000000 ||
+                  _CROSS_64KB(addr, size, 0))
+                       use_virtual_dma = 1;
+               else
+                       use_virtual_dma = 0;
+       } else {
+               use_virtual_dma = can_use_virtual_dma & 1;
+       }
+}
+
+#define fd_chose_dma_mode(addr, size) _fd_chose_dma_mode(addr, size)
+
+
+static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io)
+{
+       doing_pdma = 1;
+       virtual_dma_port = io;
+       virtual_dma_mode = (mode  == DMA_MODE_WRITE);
+       virtual_dma_addr = addr;
+       virtual_dma_count = size;
+       virtual_dma_residue = 0;
+       return 0;
+}
+
+static int hard_dma_setup(char *addr, unsigned long size, int mode, int io)
+{
+#ifdef FLOPPY_SANITY_CHECK
+       if (CROSS_64KB(addr, size)) {
+               printk("DMA crossing 64-K boundary %p-%p\n", addr, addr+size);
+               return -1;
+       }
+#endif
+       /* actual, physical DMA */
+       doing_pdma = 0;
+       clear_dma_ff(FLOPPY_DMA);
+       set_dma_mode(FLOPPY_DMA,mode);
+       set_dma_addr(FLOPPY_DMA,isa_virt_to_bus(addr));
+       set_dma_count(FLOPPY_DMA,size);
+       enable_dma(FLOPPY_DMA);
+       return 0;
+}
+
+struct fd_routine_l {
+       int (*_request_dma)(unsigned int dmanr, const char * device_id);
+       void (*_free_dma)(unsigned int dmanr);
+       int (*_get_dma_residue)(unsigned int dummy);
+       unsigned long (*_dma_mem_alloc) (unsigned long size);
+       int (*_dma_setup)(char *addr, unsigned long size, int mode, int io);
+} fd_routine[] = {
+       {
+               request_dma,
+               free_dma,
+               get_dma_residue,
+               dma_mem_alloc,
+               hard_dma_setup
+       },
+       {
+               vdma_request_dma,
+               vdma_nop,
+               vdma_get_dma_residue,
+               vdma_mem_alloc,
+               vdma_dma_setup
+       }
+};
+
+
+static int FDC1 = 0x3f0;
+static int FDC2 = -1;
+
+/*
+ * Floppy types are stored in the rtc's CMOS RAM and so rtc_lock
+ * is needed to prevent corrupted CMOS RAM in case "insmod floppy"
+ * coincides with another rtc CMOS user.               Paul G.
+ */
+#define FLOPPY0_TYPE   ({                              \
+       unsigned long flags;                            \
+       unsigned char val;                              \
+       spin_lock_irqsave(&rtc_lock, flags);            \
+       val = (CMOS_READ(0x10) >> 4) & 15;              \
+       spin_unlock_irqrestore(&rtc_lock, flags);       \
+       val;                                            \
+})
+
+#define FLOPPY1_TYPE   ({                              \
+       unsigned long flags;                            \
+       unsigned char val;                              \
+       spin_lock_irqsave(&rtc_lock, flags);            \
+       val = CMOS_READ(0x10) & 15;                     \
+       spin_unlock_irqrestore(&rtc_lock, flags);       \
+       val;                                            \
+})
+
+#define N_FDC 2
+#define N_DRIVE 8
+
+#define FLOPPY_MOTOR_MASK 0xf0
+
+#define AUTO_DMA
+
+#define EXTRA_FLOPPY_PARAMS
+
+#endif /* __ASM_X86_64_FLOPPY_H */
diff --git a/include/asm-x86_64/hardirq.h b/include/asm-x86_64/hardirq.h
new file mode 100644 (file)
index 0000000..c132bc2
--- /dev/null
@@ -0,0 +1,93 @@
+#ifndef __ASM_HARDIRQ_H
+#define __ASM_HARDIRQ_H
+
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/irq.h>
+#include <asm/pda.h>
+
+#define __ARCH_IRQ_STAT 1
+
+/* Generate a lvalue for a pda member. Should fix softirq.c instead to use
+   special access macros. This would generate better code. */ 
+#define __IRQ_STAT(cpu,member) (read_pda(me)->member)
+
+typedef struct {
+       /* Empty. All the fields have moved to the PDA. */
+} irq_cpustat_t; 
+
+#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
+
+/*
+ * Are we in an interrupt context? Either doing bottom half
+ * or hardware interrupt processing?
+ */
+#define in_interrupt() \
+       ((read_pda(__local_irq_count) +  read_pda(__local_bh_count)) != 0)
+#define in_irq() (read_pda(__local_irq_count) != 0)
+
+#ifndef CONFIG_SMP
+
+#define hardirq_trylock(cpu)   (local_irq_count() == 0)
+#define hardirq_endlock(cpu)   do { } while (0)
+
+#define irq_enter(cpu, irq)    (local_irq_count()++)
+#define irq_exit(cpu, irq)     (local_irq_count()--)
+
+#define synchronize_irq()      barrier()
+
+#define release_irqlock(cpu)   do { } while (0)
+
+#else
+
+#include <asm/atomic.h>
+#include <asm/smp.h>
+
+extern unsigned char global_irq_holder;
+extern unsigned volatile long global_irq_lock; /* long for set_bit -RR */
+
+static inline int irqs_running (void)
+{
+       int i;
+
+       for (i = 0; i < smp_num_cpus; i++)
+               if (read_pda(__local_irq_count))
+                       return 1;
+       return 0;
+}
+
+static inline void release_irqlock(int cpu)
+{
+       /* if we didn't own the irq lock, just ignore.. */
+       if (global_irq_holder == (unsigned char) cpu) {
+               global_irq_holder = NO_PROC_ID;
+               clear_bit(0,&global_irq_lock);
+       }
+}
+
+static inline void irq_enter(int cpu, int irq)
+{
+       add_pda(__local_irq_count, 1);
+
+       while (test_bit(0,&global_irq_lock)) {
+               cpu_relax();
+       }
+}
+
+static inline void irq_exit(int cpu, int irq)
+{
+       sub_pda(__local_irq_count, 1);
+}
+
+static inline int hardirq_trylock(int cpu)
+{
+       return !read_pda(__local_irq_count) && !test_bit(0,&global_irq_lock);
+}
+
+#define hardirq_endlock(cpu)   do { } while (0)
+
+extern void synchronize_irq(void);
+
+#endif /* CONFIG_SMP */
+
+#endif /* __ASM_HARDIRQ_H */
diff --git a/include/asm-x86_64/hdreg.h b/include/asm-x86_64/hdreg.h
new file mode 100644 (file)
index 0000000..7de6fcf
--- /dev/null
@@ -0,0 +1,12 @@
+/*
+ *  linux/include/asm-x86_64/hdreg.h
+ *
+ *  Copyright (C) 1994-1996  Linus Torvalds & authors
+ */
+
+#ifndef __ASMx86_64_HDREG_H
+#define __ASMx86_64_HDREG_H
+
+typedef unsigned short ide_ioreg_t;
+
+#endif /* __ASMx86_64_HDREG_H */
diff --git a/include/asm-x86_64/hw_irq.h b/include/asm-x86_64/hw_irq.h
new file mode 100644 (file)
index 0000000..4182706
--- /dev/null
@@ -0,0 +1,210 @@
+#ifndef _ASM_HW_IRQ_H
+#define _ASM_HW_IRQ_H
+
+/*
+ *     linux/include/asm/hw_irq.h
+ *
+ *     (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ *
+ *     moved some of the old arch/i386/kernel/irq.h to here. VY
+ *
+ *     IRQ/IPI changes taken from work by Thomas Radke
+ *     <tomsoft@informatik.tu-chemnitz.de>
+ *
+ *     hacked by Andi Kleen for x86-64.
+ * 
+ *  $Id: hw_irq.h,v 1.24 2001/09/14 20:55:03 vojtech Exp $
+ */
+
+#include <linux/config.h>
+#include <asm/atomic.h>
+#include <asm/irq.h>
+
+/*
+ * IDT vectors usable for external interrupt sources start
+ * at 0x20:
+ */
+#define FIRST_EXTERNAL_VECTOR  0x20
+
+#define IA32_SYSCALL_VECTOR    0x80
+
+
+/*
+ * Vectors 0x20-0x2f are used for ISA interrupts.
+ */
+
+/*
+ * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
+ *
+ *  some of the following vectors are 'rare', they are merged
+ *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
+ *  TLB, reschedule and local APIC vectors are performance-critical.
+ *
+ *  Vectors 0xf0-0xf9 are free (reserved for future Linux use).
+ */
+#define SPURIOUS_APIC_VECTOR   0xff
+#define ERROR_APIC_VECTOR      0xfe
+#define INVALIDATE_TLB_VECTOR  0xfd
+#define RESCHEDULE_VECTOR      0xfc
+#define TASK_MIGRATION_VECTOR  0xfb
+#define CALL_FUNCTION_VECTOR   0xfa
+
+/*
+ * Local APIC timer IRQ vector is on a different priority level,
+ * to work around the 'lost local interrupt if more than 2 IRQ
+ * sources per level' errata.
+ */
+#define LOCAL_TIMER_VECTOR     0xef
+
+/*
+ * First APIC vector available to drivers: (vectors 0x30-0xee)
+ * we start at 0x31 to spread out vectors evenly between priority
+ * levels. (0x80 is the syscall vector)
+ */
+#define FIRST_DEVICE_VECTOR    0x31
+#define FIRST_SYSTEM_VECTOR    0xef
+
+extern int irq_vector[NR_IRQS];
+#define IO_APIC_VECTOR(irq)    irq_vector[irq]
+
+/*
+ * Various low-level irq details needed by irq.c, process.c,
+ * time.c, io_apic.c and smp.c
+ *
+ * Interrupt entry/exit code at both C and assembly level
+ */
+
+extern void mask_irq(unsigned int irq);
+extern void unmask_irq(unsigned int irq);
+extern void disable_8259A_irq(unsigned int irq);
+extern void enable_8259A_irq(unsigned int irq);
+extern int i8259A_irq_pending(unsigned int irq);
+extern void make_8259A_irq(unsigned int irq);
+extern void init_8259A(int aeoi);
+extern void FASTCALL(send_IPI_self(int vector));
+extern void init_VISWS_APIC_irqs(void);
+extern void setup_IO_APIC(void);
+extern void disable_IO_APIC(void);
+extern void print_IO_APIC(void);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+extern void send_IPI(int dest, int vector);
+
+extern unsigned long io_apic_irqs;
+
+extern atomic_t irq_err_count;
+extern atomic_t irq_mis_count;
+
+extern char _stext, _etext;
+
+#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
+
+#define __STR(x) #x
+#define STR(x) __STR(x)
+
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_PREEMPT
+#define PREEMPT_LOCK \
+"      movq %rsp,%rdx ;"                               \
+"      andq $-8192,%rdx ;"                             \
+"      incl " __STR(threadinfo_preempt_count)"(%rdx) ;"
+#else
+#define PREEMPT_LOCK
+#endif
+
+/* IF:off, stack contains irq number on origrax */ 
+#define IRQ_ENTER                                                              \
+"      cld ;"                                                                  \
+"      pushq %rdi ;"                                                           \
+"      pushq %rsi ;"                                                           \
+"      pushq %rdx ;"                                                           \
+"      pushq %rcx ;"                                                           \
+"      pushq %rax ;"                                                           \
+"      pushq %r8 ;"                                                            \
+"      pushq %r9 ;"                                                            \
+"      pushq %r10 ;"                                                           \
+"      pushq %r11 ;"                                                           \
+       PREEMPT_LOCK                                                            \
+"      leaq -48(%rsp),%rdi     # arg1 for handler ;"                           \
+"      cmpq $ " __STR(__KERNEL_CS) ",88(%rsp)  # CS - ARGOFFSET ;"             \
+"      je 1f ;"                                                                \
+"      swapgs ;"                                                               \
+"1:    addl $1,%gs: " __STR(pda_irqcount) ";"                                  \
+"      movq %gs: " __STR(pda_irqstackptr) ",%rax ;"                            \
+"      cmoveq %rax,%rsp ;"     
+
+#define IRQ_NAME2(nr) nr##_interrupt(void)
+#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr)
+
+/*
+ *     SMP has a few special interrupts for IPI messages
+ */
+
+       /* there is a second layer of macro just to get the symbolic
+          name for the vector evaluated. This change is for RTLinux */
+#define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v)
+#define XBUILD_SMP_INTERRUPT(x,v)\
+asmlinkage void x(void); \
+asmlinkage void call_##x(void); \
+__asm__( \
+"\n"__ALIGN_STR"\n" \
+SYMBOL_NAME_STR(x) ":\n\t" \
+       "push $" #v "-256;" \
+       IRQ_ENTER \
+       "pushq %rdi ; " \
+       "call " SYMBOL_NAME_STR(smp_##x) " ; " \
+       "jmp ret_from_intr")
+
+#define BUILD_COMMON_IRQ()
+
+#define BUILD_IRQ(nr) \
+asmlinkage void IRQ_NAME(nr); \
+__asm__( \
+"\n"__ALIGN_STR "\n" \
+SYMBOL_NAME_STR(IRQ) #nr "_interrupt:\n\t" \
+       "push $" #nr "-256 ; " \
+       "jmp common_interrupt");
+
+extern unsigned long prof_cpu_mask;
+extern unsigned int * prof_buffer;
+extern unsigned long prof_len;
+extern unsigned long prof_shift;
+
+/*
+ * x86 profiling function, SMP safe. We might want to do this in
+ * assembly totally?
+ */
+static inline void x86_do_profile (unsigned long rip)
+{
+       if (!prof_buffer)
+               return;
+
+       /*
+        * Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
+        * (default is all CPUs.)
+        */
+       if (!((1<<smp_processor_id()) & prof_cpu_mask))
+               return;
+
+       rip -= (unsigned long) &_stext;
+       rip >>= prof_shift;
+       /*
+        * Don't ignore out-of-bounds EIP values silently,
+        * put them into the last histogram slot, so if
+        * present, they will show up as a sharp peak.
+        */
+       if (rip > prof_len-1)
+               rip = prof_len-1;
+       atomic_inc((atomic_t *)&prof_buffer[rip]);
+}
+
+#ifdef CONFIG_SMP /*more of this file should probably be ifdefed SMP */
+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {
+       if (IO_APIC_IRQ(i))
+               send_IPI_self(IO_APIC_VECTOR(i));
+}
+#else
+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {}
+#endif
+
+#endif /* _ASM_HW_IRQ_H */
diff --git a/include/asm-x86_64/i387.h b/include/asm-x86_64/i387.h
new file mode 100644 (file)
index 0000000..fc7d62e
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * include/asm-x86_64/i387.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ */
+
+#ifndef __ASM_X86_64_I387_H
+#define __ASM_X86_64_I387_H
+
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <asm/processor.h>
+#include <asm/sigcontext.h>
+#include <asm/user.h>
+
+extern void init_fpu(void);
+
+/*
+ * FPU lazy state save handling...
+ */
+extern void save_fpu( struct task_struct *tsk );
+extern void save_init_fpu( struct task_struct *tsk );
+extern void restore_fpu( struct task_struct *tsk );
+
+extern void kernel_fpu_begin(void);
+#define kernel_fpu_end() do { stts(); preempt_enable(); } while(0)
+
+
+#define unlazy_fpu( tsk ) do { \
+       if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) \
+               save_init_fpu( tsk ); \
+} while (0)
+
+#define clear_fpu( tsk )                                       \
+do {                                                           \
+       if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) {           \
+               asm volatile("fwait");                          \
+               clear_tsk_thread_flag(tsk, TIF_USEDFPU);        \
+               stts();                                         \
+       }                                                       \
+} while (0)
+
+/*
+ * FPU state interaction...
+ */
+extern unsigned short get_fpu_cwd( struct task_struct *tsk );
+extern unsigned short get_fpu_swd( struct task_struct *tsk );
+extern unsigned short get_fpu_twd( struct task_struct *tsk );
+extern unsigned short get_fpu_mxcsr( struct task_struct *tsk );
+
+extern void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd );
+extern void set_fpu_swd( struct task_struct *tsk, unsigned short swd );
+extern void set_fpu_twd( struct task_struct *tsk, unsigned short twd );
+extern void set_fpu_mxcsr( struct task_struct *tsk, unsigned short mxcsr );
+
+#define load_mxcsr( val ) do { \
+               unsigned long __mxcsr = ((unsigned long)(val) & 0xffbf); \
+               asm volatile( "ldmxcsr %0" : : "m" (__mxcsr) ); \
+} while (0)
+
+/*
+ * Signal frame handlers...
+ */
+extern int save_i387( struct _fpstate *buf );
+extern int restore_i387( struct _fpstate *buf );
+
+/*
+ * ptrace request handers...
+ */
+extern int get_fpregs( struct user_i387_struct *buf,
+                      struct task_struct *tsk );
+extern int set_fpregs( struct task_struct *tsk,
+                      struct user_i387_struct *buf );
+
+/*
+ * FPU state for core dumps...
+ */
+extern int dump_fpu( struct pt_regs *regs,
+                    struct user_i387_struct *fpu );
+extern int dump_extended_fpu( struct pt_regs *regs,
+                             struct user_i387_struct *fpu );
+
+#endif /* __ASM_X86_64_I387_H */
diff --git a/include/asm-x86_64/ia32.h b/include/asm-x86_64/ia32.h
new file mode 100644 (file)
index 0000000..1d99fce
--- /dev/null
@@ -0,0 +1,274 @@
+#ifndef _ASM_X86_64_IA32_H
+#define _ASM_X86_64_IA32_H
+
+#include <linux/config.h>
+
+#ifdef CONFIG_IA32_EMULATION
+
+/*
+ * 32 bit structures for IA32 support.
+ */
+
+/* 32bit compatibility types */
+typedef unsigned int          __kernel_size_t32;
+typedef int                   __kernel_ssize_t32;
+typedef int                   __kernel_ptrdiff_t32;
+typedef int                   __kernel_time_t32;
+typedef int                   __kernel_clock_t32;
+typedef int                   __kernel_pid_t32;
+typedef unsigned short        __kernel_ipc_pid_t32;
+typedef unsigned short        __kernel_uid_t32;
+typedef unsigned short        __kernel_gid_t32;
+typedef unsigned short        __kernel_dev_t32;
+typedef unsigned int          __kernel_ino_t32;
+typedef unsigned short        __kernel_mode_t32;
+typedef unsigned short        __kernel_umode_t32;
+typedef short                 __kernel_nlink_t32;
+typedef int                   __kernel_daddr_t32;
+typedef int                   __kernel_off_t32;
+typedef unsigned int          __kernel_caddr_t32;
+typedef long                  __kernel_loff_t32;
+typedef __kernel_fsid_t               __kernel_fsid_t32;
+
+
+/* fcntl.h */
+struct flock32 {
+       short l_type;
+       short l_whence;
+       __kernel_off_t32 l_start;
+       __kernel_off_t32 l_len;
+       __kernel_pid_t32 l_pid;
+};
+
+
+struct ia32_flock64 {
+       short  l_type;
+       short  l_whence;
+       loff_t l_start;  /* unnatural alignment */
+       loff_t l_len;
+       pid_t  l_pid;
+} __attribute__((packed));
+
+#define F_GETLK64      12      /*  using 'struct flock64' */
+#define F_SETLK64      13
+#define F_SETLKW64     14
+
+
+
+/* sigcontext.h */
+/* The x86-64 port uses FXSAVE without prefix; thus a 32bit compatible
+   FXSAVE layout. The additional XMM registers are added, but they're 
+   in currently unused space. Hopefully nobody else will use them*/ 
+#define _fpstate_ia32 _fpstate
+
+struct sigcontext_ia32 {
+       unsigned short gs, __gsh;
+       unsigned short fs, __fsh;
+       unsigned short es, __esh;
+       unsigned short ds, __dsh;
+       unsigned int edi;
+       unsigned int esi;
+       unsigned int ebp;
+       unsigned int esp;
+       unsigned int ebx;
+       unsigned int edx;
+       unsigned int ecx;
+       unsigned int eax;
+       unsigned int trapno;
+       unsigned int err;
+       unsigned int eip;
+       unsigned short cs, __csh;
+       unsigned int eflags;
+       unsigned int esp_at_signal;
+       unsigned short ss, __ssh;
+       unsigned int fpstate;           /* really (struct _fpstate_ia32 *) */
+       unsigned int oldmask;
+       unsigned int cr2;
+};
+
+/* signal.h */
+#define _IA32_NSIG            64
+#define _IA32_NSIG_BPW        32
+#define _IA32_NSIG_WORDS              (_IA32_NSIG / _IA32_NSIG_BPW)
+
+typedef struct {
+       unsigned int sig[_IA32_NSIG_WORDS];
+} sigset32_t;
+
+struct sigaction32 {
+       unsigned int  sa_handler;       /* Really a pointer, but need to deal 
+                                            with 32 bits */
+       unsigned int sa_flags;
+       unsigned int sa_restorer;       /* Another 32 bit pointer */
+       sigset32_t sa_mask;             /* A 32 bit mask */
+};
+
+typedef unsigned int old_sigset32_t;   /* at least 32 bits */
+
+struct old_sigaction32 {
+       unsigned int  sa_handler;       /* Really a pointer, but need to deal 
+                                            with 32 bits */
+       old_sigset32_t sa_mask;         /* A 32 bit mask */
+       unsigned int sa_flags;
+       unsigned int sa_restorer;       /* Another 32 bit pointer */
+};
+
+typedef struct sigaltstack_ia32 {
+       unsigned int    ss_sp;
+       int             ss_flags;
+       unsigned int    ss_size;
+} stack_ia32_t;
+
+struct ucontext_ia32 {
+       unsigned int      uc_flags;
+       unsigned int      uc_link;
+       stack_ia32_t      uc_stack;
+       struct sigcontext_ia32 uc_mcontext;
+       sigset32_t        uc_sigmask;   /* mask last for extensibility */
+};
+
+struct stat32 {
+       unsigned short st_dev;
+       unsigned short __pad1;
+       unsigned int st_ino;
+       unsigned short st_mode;
+       unsigned short st_nlink;
+       unsigned short st_uid;
+       unsigned short st_gid;
+       unsigned short st_rdev;
+       unsigned short __pad2;
+       unsigned int  st_size;
+       unsigned int  st_blksize;
+       unsigned int  st_blocks;
+       unsigned int  st_atime;
+       unsigned int  __unused1;
+       unsigned int  st_mtime;
+       unsigned int  __unused2;
+       unsigned int  st_ctime;
+       unsigned int  __unused3;
+       unsigned int  __unused4;
+       unsigned int  __unused5;
+};
+
+
+/* This matches struct stat64 in glibc2.2, hence the absolutely
+ * insane amounts of padding around dev_t's.
+ */
+struct stat64 {
+       unsigned long long      st_dev;
+       unsigned char           __pad0[4];
+
+#define STAT64_HAS_BROKEN_ST_INO       1
+       unsigned int            __st_ino;
+
+       unsigned int            st_mode;
+       unsigned int            st_nlink;
+
+       unsigned int            st_uid;
+       unsigned int            st_gid;
+
+       unsigned long long      st_rdev;
+       unsigned char           __pad3[4];
+
+       long long               st_size;
+       unsigned int            st_blksize;
+
+       long long               st_blocks;/* Number 512-byte blocks allocated. */
+
+       unsigned long long      st_atime;
+       unsigned long long      st_mtime;
+       unsigned long long      st_ctime;
+
+       unsigned long long      st_ino;
+} __attribute__((packed));
+
+
+struct statfs32 {
+       int f_type;
+       int f_bsize;
+       int f_blocks;
+       int f_bfree;
+       int f_bavail;
+       int f_files;
+       int f_ffree;
+       __kernel_fsid_t32 f_fsid;
+       int f_namelen;  /* SunOS ignores this field. */
+       int f_spare[6];
+};
+
+typedef union sigval32 {
+       int sival_int;
+       unsigned int sival_ptr;
+} sigval_t32;
+
+typedef struct siginfo32 {
+       int si_signo;
+       int si_errno;
+       int si_code;
+
+       union {
+               int _pad[((128/sizeof(int)) - 3)];
+
+               /* kill() */
+               struct {
+                       unsigned int _pid;      /* sender's pid */
+                       unsigned int _uid;      /* sender's uid */
+               } _kill;
+
+               /* POSIX.1b timers */
+               struct {
+                       unsigned int _timer1;
+                       unsigned int _timer2;
+               } _timer;
+
+               /* POSIX.1b signals */
+               struct {
+                       unsigned int _pid;      /* sender's pid */
+                       unsigned int _uid;      /* sender's uid */
+                       sigval_t32 _sigval;
+               } _rt;
+
+               /* SIGCHLD */
+               struct {
+                       unsigned int _pid;      /* which child */
+                       unsigned int _uid;      /* sender's uid */
+                       int _status;            /* exit code */
+                       __kernel_clock_t32 _utime;
+                       __kernel_clock_t32 _stime;
+               } _sigchld;
+
+               /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+               struct {
+                       unsigned int _addr;     /* faulting insn/memory ref. */
+               } _sigfault;
+
+               /* SIGPOLL */
+               struct {
+                       int _band;      /* POLL_IN, POLL_OUT, POLL_MSG */
+                       int _fd;
+               } _sigpoll;
+       } _sifields;
+} siginfo_t32;
+
+
+struct ustat32 {
+       __u32   f_tfree;
+       __kernel_ino_t32                f_tinode;
+       char                    f_fname[6];
+       char                    f_fpack[6];
+};
+
+struct iovec32 { 
+       unsigned int iov_base; 
+       int iov_len; 
+};
+
+
+#ifdef __KERNEL__
+struct iovec *get_iovec32(struct iovec32 *iov32, struct iovec *iov_buf, u32 count, int type);
+#endif
+
+
+#endif /* !CONFIG_IA32_SUPPORT */
+#endif 
diff --git a/include/asm-x86_64/ia32_unistd.h b/include/asm-x86_64/ia32_unistd.h
new file mode 100644 (file)
index 0000000..65ce612
--- /dev/null
@@ -0,0 +1,251 @@
+#ifndef _ASM_X86_64_IA32_UNISTD_H_
+#define _ASM_X86_64_IA32_UNISTD_H_
+
+/*
+ * This file contains the system call numbers of the ia32 port,
+ * this is for the kernel only.
+ */
+
+#define __NR_ia32_exit           1
+#define __NR_ia32_fork           2
+#define __NR_ia32_read           3
+#define __NR_ia32_write                  4
+#define __NR_ia32_open           5
+#define __NR_ia32_close                  6
+#define __NR_ia32_waitpid                7
+#define __NR_ia32_creat                  8
+#define __NR_ia32_link           9
+#define __NR_ia32_unlink                10
+#define __NR_ia32_execve                11
+#define __NR_ia32_chdir                 12
+#define __NR_ia32_time          13
+#define __NR_ia32_mknod                 14
+#define __NR_ia32_chmod                 15
+#define __NR_ia32_lchown                16
+#define __NR_ia32_break                 17
+#define __NR_ia32_oldstat               18
+#define __NR_ia32_lseek                 19
+#define __NR_ia32_getpid                20
+#define __NR_ia32_mount                 21
+#define __NR_ia32_umount                22
+#define __NR_ia32_setuid                23
+#define __NR_ia32_getuid                24
+#define __NR_ia32_stime                 25
+#define __NR_ia32_ptrace                26
+#define __NR_ia32_alarm                 27
+#define __NR_ia32_oldfstat              28
+#define __NR_ia32_pause                 29
+#define __NR_ia32_utime                 30
+#define __NR_ia32_stty          31
+#define __NR_ia32_gtty          32
+#define __NR_ia32_access                33
+#define __NR_ia32_nice          34
+#define __NR_ia32_ftime                 35
+#define __NR_ia32_sync          36
+#define __NR_ia32_kill          37
+#define __NR_ia32_rename                38
+#define __NR_ia32_mkdir                 39
+#define __NR_ia32_rmdir                 40
+#define __NR_ia32_dup           41
+#define __NR_ia32_pipe          42
+#define __NR_ia32_times                 43
+#define __NR_ia32_prof          44
+#define __NR_ia32_brk           45
+#define __NR_ia32_setgid                46
+#define __NR_ia32_getgid                47
+#define __NR_ia32_signal                48
+#define __NR_ia32_geteuid               49
+#define __NR_ia32_getegid               50
+#define __NR_ia32_acct          51
+#define __NR_ia32_umount2               52
+#define __NR_ia32_lock          53
+#define __NR_ia32_ioctl                 54
+#define __NR_ia32_fcntl                 55
+#define __NR_ia32_mpx           56
+#define __NR_ia32_setpgid               57
+#define __NR_ia32_ulimit                58
+#define __NR_ia32_oldolduname   59
+#define __NR_ia32_umask                 60
+#define __NR_ia32_chroot                61
+#define __NR_ia32_ustat                 62
+#define __NR_ia32_dup2          63
+#define __NR_ia32_getppid               64
+#define __NR_ia32_getpgrp               65
+#define __NR_ia32_setsid                66
+#define __NR_ia32_sigaction             67
+#define __NR_ia32_sgetmask              68
+#define __NR_ia32_ssetmask              69
+#define __NR_ia32_setreuid              70
+#define __NR_ia32_setregid              71
+#define __NR_ia32_sigsuspend            72
+#define __NR_ia32_sigpending            73
+#define __NR_ia32_sethostname   74
+#define __NR_ia32_setrlimit             75
+#define __NR_ia32_getrlimit             76     /* Back compatible 2Gig limited rlimit */
+#define __NR_ia32_getrusage             77
+#define __NR_ia32_gettimeofday  78
+#define __NR_ia32_settimeofday  79
+#define __NR_ia32_getgroups             80
+#define __NR_ia32_setgroups             81
+#define __NR_ia32_select                82
+#define __NR_ia32_symlink               83
+#define __NR_ia32_oldlstat              84
+#define __NR_ia32_readlink              85
+#define __NR_ia32_uselib                86
+#define __NR_ia32_swapon                87
+#define __NR_ia32_reboot                88
+#define __NR_ia32_readdir               89
+#define __NR_ia32_mmap          90
+#define __NR_ia32_munmap                91
+#define __NR_ia32_truncate              92
+#define __NR_ia32_ftruncate             93
+#define __NR_ia32_fchmod                94
+#define __NR_ia32_fchown                95
+#define __NR_ia32_getpriority   96
+#define __NR_ia32_setpriority   97
+#define __NR_ia32_profil                98
+#define __NR_ia32_statfs                99
+#define __NR_ia32_fstatfs              100
+#define __NR_ia32_ioperm               101
+#define __NR_ia32_socketcall           102
+#define __NR_ia32_syslog               103
+#define __NR_ia32_setitimer            104
+#define __NR_ia32_getitimer            105
+#define __NR_ia32_stat         106
+#define __NR_ia32_lstat                107
+#define __NR_ia32_fstat                108
+#define __NR_ia32_olduname             109
+#define __NR_ia32_iopl         110
+#define __NR_ia32_vhangup              111
+#define __NR_ia32_idle         112
+#define __NR_ia32_vm86old              113
+#define __NR_ia32_wait4                114
+#define __NR_ia32_swapoff              115
+#define __NR_ia32_sysinfo              116
+#define __NR_ia32_ipc          117
+#define __NR_ia32_fsync                118
+#define __NR_ia32_sigreturn            119
+#define __NR_ia32_clone                120
+#define __NR_ia32_setdomainname        121
+#define __NR_ia32_uname                122
+#define __NR_ia32_modify_ldt           123
+#define __NR_ia32_adjtimex             124
+#define __NR_ia32_mprotect             125
+#define __NR_ia32_sigprocmask  126
+#define __NR_ia32_create_module        127
+#define __NR_ia32_init_module  128
+#define __NR_ia32_delete_module        129
+#define __NR_ia32_get_kernel_syms      130
+#define __NR_ia32_quotactl             131
+#define __NR_ia32_getpgid              132
+#define __NR_ia32_fchdir               133
+#define __NR_ia32_bdflush              134
+#define __NR_ia32_sysfs                135
+#define __NR_ia32_personality  136
+#define __NR_ia32_afs_syscall  137 /* Syscall for Andrew File System */
+#define __NR_ia32_setfsuid             138
+#define __NR_ia32_setfsgid             139
+#define __NR_ia32__llseek              140
+#define __NR_ia32_getdents             141
+#define __NR_ia32__newselect           142
+#define __NR_ia32_flock                143
+#define __NR_ia32_msync                144
+#define __NR_ia32_readv                145
+#define __NR_ia32_writev               146
+#define __NR_ia32_getsid               147
+#define __NR_ia32_fdatasync            148
+#define __NR_ia32__sysctl              149
+#define __NR_ia32_mlock                150
+#define __NR_ia32_munlock              151
+#define __NR_ia32_mlockall             152
+#define __NR_ia32_munlockall           153
+#define __NR_ia32_sched_setparam               154
+#define __NR_ia32_sched_getparam               155
+#define __NR_ia32_sched_setscheduler           156
+#define __NR_ia32_sched_getscheduler           157
+#define __NR_ia32_sched_yield          158
+#define __NR_ia32_sched_get_priority_max       159
+#define __NR_ia32_sched_get_priority_min       160
+#define __NR_ia32_sched_rr_get_interval        161
+#define __NR_ia32_nanosleep            162
+#define __NR_ia32_mremap               163
+#define __NR_ia32_setresuid            164
+#define __NR_ia32_getresuid            165
+#define __NR_ia32_vm86         166
+#define __NR_ia32_query_module 167
+#define __NR_ia32_poll         168
+#define __NR_ia32_nfsservctl           169
+#define __NR_ia32_setresgid            170
+#define __NR_ia32_getresgid            171
+#define __NR_ia32_prctl              172
+#define __NR_ia32_rt_sigreturn 173
+#define __NR_ia32_rt_sigaction 174
+#define __NR_ia32_rt_sigprocmask       175
+#define __NR_ia32_rt_sigpending        176
+#define __NR_ia32_rt_sigtimedwait      177
+#define __NR_ia32_rt_sigqueueinfo      178
+#define __NR_ia32_rt_sigsuspend        179
+#define __NR_ia32_pread                180
+#define __NR_ia32_pwrite               181
+#define __NR_ia32_chown                182
+#define __NR_ia32_getcwd               183
+#define __NR_ia32_capget               184
+#define __NR_ia32_capset               185
+#define __NR_ia32_sigaltstack  186
+#define __NR_ia32_sendfile             187
+#define __NR_ia32_getpmsg              188     /* some people actually want streams */
+#define __NR_ia32_putpmsg              189     /* some people actually want streams */
+#define __NR_ia32_vfork                190
+#define __NR_ia32_ugetrlimit           191     /* SuS compliant getrlimit */
+#define __NR_ia32_mmap2                192
+#define __NR_ia32_truncate64           193
+#define __NR_ia32_ftruncate64  194
+#define __NR_ia32_stat64               195
+#define __NR_ia32_lstat64              196
+#define __NR_ia32_fstat64              197
+#define __NR_ia32_lchown32             198
+#define __NR_ia32_getuid32             199
+#define __NR_ia32_getgid32             200
+#define __NR_ia32_geteuid32            201
+#define __NR_ia32_getegid32            202
+#define __NR_ia32_setreuid32           203
+#define __NR_ia32_setregid32           204
+#define __NR_ia32_getgroups32  205
+#define __NR_ia32_setgroups32  206
+#define __NR_ia32_fchown32             207
+#define __NR_ia32_setresuid32  208
+#define __NR_ia32_getresuid32  209
+#define __NR_ia32_setresgid32  210
+#define __NR_ia32_getresgid32  211
+#define __NR_ia32_chown32              212
+#define __NR_ia32_setuid32             213
+#define __NR_ia32_setgid32             214
+#define __NR_ia32_setfsuid32           215
+#define __NR_ia32_setfsgid32           216
+#define __NR_ia32_pivot_root           217
+#define __NR_ia32_mincore              218
+#define __NR_ia32_madvise              219
+#define __NR_ia32_madvise1             219     /* delete when C lib stub is removed */
+#define __NR_ia32_getdents64           220
+#define __NR_ia32_fcntl64              221
+#define __NR_ia32_tuxcall              222
+#define __NR_ia32_security             223
+#define __NR_ia32_gettid               224
+#define __NR_ia32_readahead            225
+#define __NR_ia32_setxattr             226
+#define __NR_ia32_lsetxattr            227
+#define __NR_ia32_fsetxattr            228
+#define __NR_ia32_getxattr             229
+#define __NR_ia32_lgetxattr            230
+#define __NR_ia32_fgetxattr            231
+#define __NR_ia32_listxattr            232
+#define __NR_ia32_llistxattr           233
+#define __NR_ia32_flistxattr           234
+#define __NR_ia32_removexattr  235
+#define __NR_ia32_lremovexattr 236
+#define __NR_ia32_fremovexattr 237
+#define __NR_ia32_tkill                238
+
+#define IA32_NR_syscalls 240   /* must be > than biggest syscall! */   
+
+#endif /* _ASM_X86_64_IA32_UNISTD_H_ */
diff --git a/include/asm-x86_64/ide.h b/include/asm-x86_64/ide.h
new file mode 100644 (file)
index 0000000..6642abf
--- /dev/null
@@ -0,0 +1,128 @@
+/*
+ *  linux/include/asm-i386/ide.h
+ *
+ *  Copyright (C) 1994-1996  Linus Torvalds & authors
+ */
+
+/*
+ *  This file contains the i386 architecture specific IDE code.
+ */
+
+#ifndef __ASMi386_IDE_H
+#define __ASMi386_IDE_H
+
+#ifdef __KERNEL__
+
+#include <linux/config.h>
+
+#ifndef MAX_HWIFS
+# ifdef CONFIG_BLK_DEV_IDEPCI
+#define MAX_HWIFS      10
+# else
+#define MAX_HWIFS      6
+# endif
+#endif
+
+#define ide__sti()     __sti()
+
+static __inline__ int ide_default_irq(ide_ioreg_t base)
+{
+       switch (base) {
+               case 0x1f0: return 14;
+               case 0x170: return 15;
+               case 0x1e8: return 11;
+               case 0x168: return 10;
+               case 0x1e0: return 8;
+               case 0x160: return 12;
+               default:
+                       return 0;
+       }
+}
+
+static __inline__ ide_ioreg_t ide_default_io_base(int index)
+{
+       switch (index) {
+               case 0: return 0x1f0;
+               case 1: return 0x170;
+               case 2: return 0x1e8;
+               case 3: return 0x168;
+               case 4: return 0x1e0;
+               case 5: return 0x160;
+               default:
+                       return 0;
+       }
+}
+
+static __inline__ void ide_init_hwif_ports(hw_regs_t *hw, ide_ioreg_t data_port, ide_ioreg_t ctrl_port, int *irq)
+{
+       ide_ioreg_t reg = data_port;
+       int i;
+
+       for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
+               hw->io_ports[i] = reg;
+               reg += 1;
+       }
+       if (ctrl_port) {
+               hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
+       } else {
+               hw->io_ports[IDE_CONTROL_OFFSET] = hw->io_ports[IDE_DATA_OFFSET] + 0x206;
+       }
+       if (irq != NULL)
+               *irq = 0;
+       hw->io_ports[IDE_IRQ_OFFSET] = 0;
+}
+
+static __inline__ void ide_init_default_hwifs(void)
+{
+#ifndef CONFIG_BLK_DEV_IDEPCI
+       hw_regs_t hw;
+       int index;
+
+       for(index = 0; index < MAX_HWIFS; index++) {
+               ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL);
+               hw.irq = ide_default_irq(ide_default_io_base(index));
+               ide_register_hw(&hw, NULL);
+       }
+#endif /* CONFIG_BLK_DEV_IDEPCI */
+}
+
+typedef union {
+       unsigned all                    : 8;    /* all of the bits together */
+       struct {
+               unsigned head           : 4;    /* always zeros here */
+               unsigned unit           : 1;    /* drive select number, 0 or 1 */
+               unsigned bit5           : 1;    /* always 1 */
+               unsigned lba            : 1;    /* using LBA instead of CHS */
+               unsigned bit7           : 1;    /* always 1 */
+       } b;
+} select_t;
+
+typedef union {
+       unsigned all                    : 8;    /* all of the bits together */
+       struct {
+               unsigned bit0           : 1;
+               unsigned nIEN           : 1;    /* device INTRQ to host */
+               unsigned SRST           : 1;    /* host soft reset bit */
+               unsigned bit3           : 1;    /* ATA-2 thingy */
+               unsigned reserved456    : 3;
+               unsigned HOB            : 1;    /* 48-bit address ordering */
+       } b;
+} control_t;
+
+#define ide_request_irq(irq,hand,flg,dev,id)   request_irq((irq),(hand),(flg),(dev),(id))
+#define ide_free_irq(irq,dev_id)               free_irq((irq), (dev_id))
+#define ide_check_region(from,extent)          check_region((from), (extent))
+#define ide_request_region(from,extent,name)   request_region((from), (extent), (name))
+#define ide_release_region(from,extent)                release_region((from), (extent))
+
+/*
+ * The following are not needed for the non-m68k ports
+ */
+#define ide_ack_intr(hwif)             (1)
+#define ide_fix_driveid(id)            do {} while (0)
+#define ide_release_lock(lock)         do {} while (0)
+#define ide_get_lock(lock, hdlr, data) do {} while (0)
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASMi386_IDE_H */
diff --git a/include/asm-x86_64/init.h b/include/asm-x86_64/init.h
new file mode 100644 (file)
index 0000000..17d2155
--- /dev/null
@@ -0,0 +1 @@
+#error "<asm/init.h> should never be used - use <linux/init.h> instead"
diff --git a/include/asm-x86_64/io.h b/include/asm-x86_64/io.h
new file mode 100644 (file)
index 0000000..e66654c
--- /dev/null
@@ -0,0 +1,270 @@
+#ifndef _ASM_IO_H
+#define _ASM_IO_H
+
+/*
+ * This file contains the definitions for the x86 IO instructions
+ * inb/inw/inl/outb/outw/outl and the "string versions" of the same
+ * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
+ * versions of the single-IO instructions (inb_p/inw_p/..).
+ *
+ * This file is not meant to be obfuscating: it's just complicated
+ * to (a) handle it all in a way that makes gcc able to optimize it
+ * as well as possible and (b) trying to avoid writing the same thing
+ * over and over again with slight variations and possibly making a
+ * mistake somewhere.
+ */
+
+/*
+ * Thanks to James van Artsdalen for a better timing-fix than
+ * the two short jumps: using outb's to a nonexistent port seems
+ * to guarantee better timings even on fast machines.
+ *
+ * On the other hand, I'd like to be sure of a non-existent port:
+ * I feel a bit unsafe about using 0x80 (should be safe, though)
+ *
+ *             Linus
+ */
+
+ /*
+  *  Bit simplified and optimized by Jan Hubicka
+  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
+  *
+  *  isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added,
+  *  isa_read[wl] and isa_write[wl] fixed
+  *  - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+  */
+
+#ifdef SLOW_IO_BY_JUMPING
+#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:"
+#else
+#define __SLOW_DOWN_IO "\noutb %%al,$0x80"
+#endif
+
+#ifdef REALLY_SLOW_IO
+#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
+#else
+#define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO
+#endif
+
+/*
+ * Talk about misusing macros..
+ */
+#define __OUT1(s,x) \
+extern inline void out##s(unsigned x value, unsigned short port) {
+
+#define __OUT2(s,s1,s2) \
+__asm__ __volatile__ ("out" #s " %" s1 "0,%" s2 "1"
+
+#define __OUT(s,s1,x) \
+__OUT1(s,x) __OUT2(s,s1,"w") : : "a" (value), "Nd" (port)); } \
+__OUT1(s##_p,x) __OUT2(s,s1,"w") __FULL_SLOW_DOWN_IO : : "a" (value), "Nd" (port));} \
+
+#define __IN1(s) \
+extern inline RETURN_TYPE in##s(unsigned short port) { RETURN_TYPE _v;
+
+#define __IN2(s,s1,s2) \
+__asm__ __volatile__ ("in" #s " %" s2 "1,%" s1 "0"
+
+#define __IN(s,s1,i...) \
+__IN1(s) __IN2(s,s1,"w") : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+__IN1(s##_p) __IN2(s,s1,"w") __FULL_SLOW_DOWN_IO : "=a" (_v) : "Nd" (port) ,##i ); return _v; } \
+
+#define __INS(s) \
+extern inline void ins##s(unsigned short port, void * addr, unsigned long count) \
+{ __asm__ __volatile__ ("rep ; ins" #s \
+: "=D" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
+
+#define __OUTS(s) \
+extern inline void outs##s(unsigned short port, const void * addr, unsigned long count) \
+{ __asm__ __volatile__ ("rep ; outs" #s \
+: "=S" (addr), "=c" (count) : "d" (port),"0" (addr),"1" (count)); }
+
+#define RETURN_TYPE unsigned char
+__IN(b,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned short
+__IN(w,"")
+#undef RETURN_TYPE
+#define RETURN_TYPE unsigned int
+__IN(l,"")
+#undef RETURN_TYPE
+
+__OUT(b,"b",char)
+__OUT(w,"w",short)
+__OUT(l,,int)
+
+__INS(b)
+__INS(w)
+__INS(l)
+
+__OUTS(b)
+__OUTS(w)
+__OUTS(l)
+
+#define IO_SPACE_LIMIT 0xffff
+
+#ifdef __KERNEL__
+
+#include <linux/vmalloc.h>
+
+/*
+ * Temporary debugging check to catch old code using
+ * unmapped ISA addresses. Will be removed in 2.4.
+ */
+#ifdef CONFIG_IO_DEBUG
+  extern void *__io_virt_debug(unsigned long x, const char *file, int line);
+  #define __io_virt(x) __io_virt_debug((unsigned long)(x), __FILE__, __LINE__)
+#else
+  #define __io_virt(x) ((void *)(x))
+#endif
+
+/*
+ * Change virtual addresses to physical addresses and vv.
+ * These are pretty trivial
+ */
+extern inline unsigned long virt_to_phys(volatile void * address)
+{
+       return __pa(address);
+}
+
+extern inline void * phys_to_virt(unsigned long address)
+{
+       return __va(address);
+}
+
+/*
+ * Change "struct page" to physical address.
+ */
+#define page_to_phys(page)     ((page - mem_map) << PAGE_SHIFT)
+
+extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
+
+extern inline void * ioremap (unsigned long offset, unsigned long size)
+{
+       return __ioremap(offset, size, 0);
+}
+
+/*
+ * This one maps high address device memory and turns off caching for that area.
+ * it's useful if some control registers are in such an area and write combining
+ * or read caching is not desirable:
+ */
+extern inline void * ioremap_nocache (unsigned long offset, unsigned long size)
+{
+        return __ioremap(offset, size, _PAGE_PCD);
+}
+
+extern void iounmap(void *addr);
+
+/*
+ * ISA I/O bus memory addresses are 1:1 with the physical address.
+ */
+#define isa_virt_to_bus virt_to_phys
+#define isa_page_to_bus page_to_phys
+#define isa_bus_to_virt phys_to_virt
+
+/*
+ * However PCI ones are not necessarily 1:1 and therefore these interfaces
+ * are forbidden in portable PCI drivers.
+ */
+extern unsigned long virt_to_bus_not_defined_use_pci_map(volatile void *addr);
+#define virt_to_bus virt_to_bus_not_defined_use_pci_map
+extern unsigned long bus_to_virt_not_defined_use_pci_map(volatile void *addr);
+#define bus_to_virt bus_to_virt_not_defined_use_pci_map
+
+/*
+ * readX/writeX() are used to access memory mapped devices. On some
+ * architectures the memory mapped IO stuff needs to be accessed
+ * differently. On the x86 architecture, we just read/write the
+ * memory location directly.
+ */
+
+#define readb(addr) (*(volatile unsigned char *) __io_virt(addr))
+#define readw(addr) (*(volatile unsigned short *) __io_virt(addr))
+#define readl(addr) (*(volatile unsigned int *) __io_virt(addr))
+#define __raw_readb readb
+#define __raw_readw readw
+#define __raw_readl readl
+
+#define writeb(b,addr) (*(volatile unsigned char *) __io_virt(addr) = (b))
+#define writew(b,addr) (*(volatile unsigned short *) __io_virt(addr) = (b))
+#define writel(b,addr) (*(volatile unsigned int *) __io_virt(addr) = (b))
+#define __raw_writeb writeb
+#define __raw_writew writew
+#define __raw_writel writel
+
+#define memset_io(a,b,c)       memset(__io_virt(a),(b),(c))
+#define memcpy_fromio(a,b,c)   memcpy((a),__io_virt(b),(c))
+#define memcpy_toio(a,b,c)     memcpy(__io_virt(a),(b),(c))
+
+/*
+ * ISA space is 'always mapped' on a typical x86 system, no need to
+ * explicitly ioremap() it. The fact that the ISA IO space is mapped
+ * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
+ * are physical addresses. The following constant pointer can be
+ * used as the IO-area pointer (it can be iounmapped as well, so the
+ * analogy with PCI is quite large):
+ */
+#define __ISA_IO_base ((char *)(PAGE_OFFSET))
+
+#define isa_readb(a) readb(__ISA_IO_base + (a))
+#define isa_readw(a) readw(__ISA_IO_base + (a))
+#define isa_readl(a) readl(__ISA_IO_base + (a))
+#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
+#define isa_writew(w,a) writew(w,__ISA_IO_base + (a))
+#define isa_writel(l,a) writel(l,__ISA_IO_base + (a))
+#define isa_memset_io(a,b,c)           memset_io(__ISA_IO_base + (a),(b),(c))
+#define isa_memcpy_fromio(a,b,c)       memcpy_fromio((a),__ISA_IO_base + (b),(c))
+#define isa_memcpy_toio(a,b,c)         memcpy_toio(__ISA_IO_base + (a),(b),(c))
+
+
+/*
+ * Again, i386 does not require mem IO specific function.
+ */
+
+#define eth_io_copy_and_sum(a,b,c,d)           eth_copy_and_sum((a),__io_virt(b),(c),(d))
+#define isa_eth_io_copy_and_sum(a,b,c,d)       eth_copy_and_sum((a),__io_virt(__ISA_IO_base + (b)),(c),(d))
+
+static inline int check_signature(unsigned long io_addr,
+       const unsigned char *signature, int length)
+{
+       int retval = 0;
+       do {
+               if (readb(io_addr) != *signature)
+                       goto out;
+               io_addr++;
+               signature++;
+               length--;
+       } while (length);
+       retval = 1;
+out:
+       return retval;
+}
+
+static inline int isa_check_signature(unsigned long io_addr,
+       const unsigned char *signature, int length)
+{
+       int retval = 0;
+       do {
+               if (isa_readb(io_addr) != *signature)
+                       goto out;
+               io_addr++;
+               signature++;
+               length--;
+       } while (length);
+       retval = 1;
+out:
+       return retval;
+}
+
+/* Nothing to do */
+
+#define dma_cache_inv(_start,_size)            do { } while (0)
+#define dma_cache_wback(_start,_size)          do { } while (0)
+#define dma_cache_wback_inv(_start,_size)      do { } while (0)
+
+#define flush_write_buffers() 
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/include/asm-x86_64/io_apic.h b/include/asm-x86_64/io_apic.h
new file mode 100644 (file)
index 0000000..3e90f19
--- /dev/null
@@ -0,0 +1,148 @@
+#ifndef __ASM_IO_APIC_H
+#define __ASM_IO_APIC_H
+
+#include <linux/config.h>
+#include <asm/types.h>
+#include <asm/mpspec.h>
+
+/*
+ * Intel IO-APIC support for SMP and UP systems.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar
+ */
+
+#ifdef CONFIG_X86_IO_APIC
+
+#define APIC_MISMATCH_DEBUG
+
+#define IO_APIC_BASE(idx) \
+               ((volatile int *)__fix_to_virt(FIX_IO_APIC_BASE_0 + idx))
+
+/*
+ * The structure of the IO-APIC:
+ */
+struct IO_APIC_reg_00 {
+       __u32   __reserved_2    : 24,
+               ID              :  4,
+               __reserved_1    :  4;
+} __attribute__ ((packed));
+
+struct IO_APIC_reg_01 {
+       __u32   version         :  8,
+               __reserved_2    :  7,
+               PRQ             :  1,
+               entries         :  8,
+               __reserved_1    :  8;
+} __attribute__ ((packed));
+
+struct IO_APIC_reg_02 {
+       __u32   __reserved_2    : 24,
+               arbitration     :  4,
+               __reserved_1    :  4;
+} __attribute__ ((packed));
+
+/*
+ * # of IO-APICs and # of IRQ routing registers
+ */
+extern int nr_ioapics;
+extern int nr_ioapic_registers[MAX_IO_APICS];
+
+enum ioapic_irq_destination_types {
+       dest_Fixed = 0,
+       dest_LowestPrio = 1,
+       dest_SMI = 2,
+       dest__reserved_1 = 3,
+       dest_NMI = 4,
+       dest_INIT = 5,
+       dest__reserved_2 = 6,
+       dest_ExtINT = 7
+};
+
+struct IO_APIC_route_entry {
+       __u32   vector          :  8,
+               delivery_mode   :  3,   /* 000: FIXED
+                                        * 001: lowest prio
+                                        * 111: ExtINT
+                                        */
+               dest_mode       :  1,   /* 0: physical, 1: logical */
+               delivery_status :  1,
+               polarity        :  1,
+               irr             :  1,
+               trigger         :  1,   /* 0: edge, 1: level */
+               mask            :  1,   /* 0: enabled, 1: disabled */
+               __reserved_2    : 15;
+
+       union {         struct { __u32
+                                       __reserved_1    : 24,
+                                       physical_dest   :  4,
+                                       __reserved_2    :  4;
+                       } physical;
+
+                       struct { __u32
+                                       __reserved_1    : 24,
+                                       logical_dest    :  8;
+                       } logical;
+       } dest;
+
+} __attribute__ ((packed));
+
+/*
+ * MP-BIOS irq configuration table structures:
+ */
+
+/* I/O APIC entries */
+extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+
+/* # of MP IRQ source entries */
+extern int mp_irq_entries;
+
+/* MP IRQ source entries */
+extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* non-0 if default (table-less) MP configuration */
+extern int mpc_default_type;
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+       *IO_APIC_BASE(apic) = reg;
+       return *(IO_APIC_BASE(apic)+4);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+       *IO_APIC_BASE(apic) = reg;
+       *(IO_APIC_BASE(apic)+4) = value;
+}
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ */
+static inline void io_apic_modify(unsigned int apic, unsigned int value)
+{
+       *(IO_APIC_BASE(apic)+4) = value;
+}
+
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
+{
+       (void) *(IO_APIC_BASE(apic)+4);
+}
+
+/* 1 if "noapic" boot option passed */
+extern int skip_ioapic_setup;
+
+/*
+ * If we use the IO-APIC for IRQ routing, disable automatic
+ * assignment of PCI IRQ's.
+ */
+#define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup)
+
+#else  /* !CONFIG_X86_IO_APIC */
+#define io_apic_assign_pci_irqs 0
+#endif
+
+#endif
diff --git a/include/asm-x86_64/ioctl.h b/include/asm-x86_64/ioctl.h
new file mode 100644 (file)
index 0000000..609b663
--- /dev/null
@@ -0,0 +1,75 @@
+/* $Id: ioctl.h,v 1.2 2001/07/04 09:08:13 ak Exp $
+ *
+ * linux/ioctl.h for Linux by H.H. Bergman.
+ */
+
+#ifndef _ASMX8664_IOCTL_H
+#define _ASMX8664_IOCTL_H
+
+/* ioctl command encoding: 32 bits total, command in lower 16 bits,
+ * size of the parameter structure in the lower 14 bits of the
+ * upper 16 bits.
+ * Encoding the size of the parameter structure in the ioctl request
+ * is useful for catching programs compiled with old versions
+ * and to avoid overwriting user space outside the user buffer area.
+ * The highest 2 bits are reserved for indicating the ``access mode''.
+ * NOTE: This limits the max parameter size to 16kB -1 !
+ */
+
+/*
+ * The following is for compatibility across the various Linux
+ * platforms.  The i386 ioctl numbering scheme doesn't really enforce
+ * a type field.  De facto, however, the top 8 bits of the lower 16
+ * bits are indeed used as a type field, so we might just as well make
+ * this explicit here.  Please be sure to use the decoding macros
+ * below from now on.
+ */
+#define _IOC_NRBITS    8
+#define _IOC_TYPEBITS  8
+#define _IOC_SIZEBITS  14
+#define _IOC_DIRBITS   2
+
+#define _IOC_NRMASK    ((1 << _IOC_NRBITS)-1)
+#define _IOC_TYPEMASK  ((1 << _IOC_TYPEBITS)-1)
+#define _IOC_SIZEMASK  ((1 << _IOC_SIZEBITS)-1)
+#define _IOC_DIRMASK   ((1 << _IOC_DIRBITS)-1)
+
+#define _IOC_NRSHIFT   0
+#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS)
+#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS)
+#define _IOC_DIRSHIFT  (_IOC_SIZESHIFT+_IOC_SIZEBITS)
+
+/*
+ * Direction bits.
+ */
+#define _IOC_NONE      0U
+#define _IOC_WRITE     1U
+#define _IOC_READ      2U
+
+#define _IOC(dir,type,nr,size) \
+       (((dir)  << _IOC_DIRSHIFT) | \
+        ((type) << _IOC_TYPESHIFT) | \
+        ((nr)   << _IOC_NRSHIFT) | \
+        ((size) << _IOC_SIZESHIFT))
+
+/* used to create numbers */
+#define _IO(type,nr)           _IOC(_IOC_NONE,(type),(nr),0)
+#define _IOR(type,nr,size)     _IOC(_IOC_READ,(type),(nr),sizeof(size))
+#define _IOW(type,nr,size)     _IOC(_IOC_WRITE,(type),(nr),sizeof(size))
+#define _IOWR(type,nr,size)    _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size))
+
+/* used to decode ioctl numbers.. */
+#define _IOC_DIR(nr)           (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK)
+#define _IOC_TYPE(nr)          (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK)
+#define _IOC_NR(nr)            (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK)
+#define _IOC_SIZE(nr)          (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK)
+
+/* ...and for the drivers/sound files... */
+
+#define IOC_IN         (_IOC_WRITE << _IOC_DIRSHIFT)
+#define IOC_OUT                (_IOC_READ << _IOC_DIRSHIFT)
+#define IOC_INOUT      ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT)
+#define IOCSIZE_MASK   (_IOC_SIZEMASK << _IOC_SIZESHIFT)
+#define IOCSIZE_SHIFT  (_IOC_SIZESHIFT)
+
+#endif /* _ASMX8664_IOCTL_H */
diff --git a/include/asm-x86_64/ioctls.h b/include/asm-x86_64/ioctls.h
new file mode 100644 (file)
index 0000000..b52fb73
--- /dev/null
@@ -0,0 +1,82 @@
+#ifndef __ARCH_X8664_IOCTLS_H__
+#define __ARCH_X8664_IOCTLS_H__
+
+#include <asm/ioctl.h>
+
+/* 0x54 is just a magic number to make these relatively unique ('T') */
+
+#define TCGETS         0x5401
+#define TCSETS         0x5402
+#define TCSETSW                0x5403
+#define TCSETSF                0x5404
+#define TCGETA         0x5405
+#define TCSETA         0x5406
+#define TCSETAW                0x5407
+#define TCSETAF                0x5408
+#define TCSBRK         0x5409
+#define TCXONC         0x540A
+#define TCFLSH         0x540B
+#define TIOCEXCL       0x540C
+#define TIOCNXCL       0x540D
+#define TIOCSCTTY      0x540E
+#define TIOCGPGRP      0x540F
+#define TIOCSPGRP      0x5410
+#define TIOCOUTQ       0x5411
+#define TIOCSTI                0x5412
+#define TIOCGWINSZ     0x5413
+#define TIOCSWINSZ     0x5414
+#define TIOCMGET       0x5415
+#define TIOCMBIS       0x5416
+#define TIOCMBIC       0x5417
+#define TIOCMSET       0x5418
+#define TIOCGSOFTCAR   0x5419
+#define TIOCSSOFTCAR   0x541A
+#define FIONREAD       0x541B
+#define TIOCINQ                FIONREAD
+#define TIOCLINUX      0x541C
+#define TIOCCONS       0x541D
+#define TIOCGSERIAL    0x541E
+#define TIOCSSERIAL    0x541F
+#define TIOCPKT                0x5420
+#define FIONBIO                0x5421
+#define TIOCNOTTY      0x5422
+#define TIOCSETD       0x5423
+#define TIOCGETD       0x5424
+#define TCSBRKP                0x5425  /* Needed for POSIX tcsendbreak() */
+#define TIOCTTYGSTRUCT 0x5426  /* For debugging only */
+#define TIOCSBRK       0x5427  /* BSD compatibility */
+#define TIOCCBRK       0x5428  /* BSD compatibility */
+#define TIOCGSID       0x5429  /* Return the session ID of FD */
+#define TIOCGPTN       _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
+#define TIOCSPTLCK     _IOW('T',0x31, int)  /* Lock/unlock Pty */
+
+#define FIONCLEX       0x5450  /* these numbers need to be adjusted. */
+#define FIOCLEX                0x5451
+#define FIOASYNC       0x5452
+#define TIOCSERCONFIG  0x5453
+#define TIOCSERGWILD   0x5454
+#define TIOCSERSWILD   0x5455
+#define TIOCGLCKTRMIOS 0x5456
+#define TIOCSLCKTRMIOS 0x5457
+#define TIOCSERGSTRUCT 0x5458 /* For debugging only */
+#define TIOCSERGETLSR   0x5459 /* Get line status register */
+#define TIOCSERGETMULTI 0x545A /* Get multiport config  */
+#define TIOCSERSETMULTI 0x545B /* Set multiport config */
+
+#define TIOCMIWAIT     0x545C  /* wait for a change on serial input line(s) */
+#define TIOCGICOUNT    0x545D  /* read serial port inline interrupt counts */
+#define TIOCGHAYESESP   0x545E  /* Get Hayes ESP configuration */
+#define TIOCSHAYESESP   0x545F  /* Set Hayes ESP configuration */
+
+/* Used for packet mode */
+#define TIOCPKT_DATA            0
+#define TIOCPKT_FLUSHREAD       1
+#define TIOCPKT_FLUSHWRITE      2
+#define TIOCPKT_STOP            4
+#define TIOCPKT_START           8
+#define TIOCPKT_NOSTOP         16
+#define TIOCPKT_DOSTOP         32
+
+#define TIOCSER_TEMT    0x01   /* Transmitter physically empty */
+
+#endif
diff --git a/include/asm-x86_64/ipc.h b/include/asm-x86_64/ipc.h
new file mode 100644 (file)
index 0000000..187951e
--- /dev/null
@@ -0,0 +1,32 @@
+#ifndef __i386_IPC_H__
+#define __i386_IPC_H__
+
+/* 
+ * These are used to wrap system calls on x86.
+ *
+ * See arch/i386/kernel/sys_i386.c for ugly details..
+ */
+
+struct ipc_kludge {
+       struct msgbuf *msgp;
+       long msgtyp;
+};
+
+#define SEMOP           1
+#define SEMGET          2
+#define SEMCTL          3
+#define MSGSND         11
+#define MSGRCV         12
+#define MSGGET         13
+#define MSGCTL         14
+#define SHMAT          21
+#define SHMDT          22
+#define SHMGET         23
+#define SHMCTL         24
+
+/* Used by the DIPC package, try and avoid reusing it */
+#define DIPC            25
+
+#define IPCCALL(version,op)    ((version)<<16 | (op))
+
+#endif
diff --git a/include/asm-x86_64/ipcbuf.h b/include/asm-x86_64/ipcbuf.h
new file mode 100644 (file)
index 0000000..0dcad4f
--- /dev/null
@@ -0,0 +1,29 @@
+#ifndef __i386_IPCBUF_H__
+#define __i386_IPCBUF_H__
+
+/*
+ * The ipc64_perm structure for i386 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 32-bit mode_t and seq
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct ipc64_perm
+{
+       __kernel_key_t          key;
+       __kernel_uid32_t        uid;
+       __kernel_gid32_t        gid;
+       __kernel_uid32_t        cuid;
+       __kernel_gid32_t        cgid;
+       __kernel_mode_t         mode;
+       unsigned short          __pad1;
+       unsigned short          seq;
+       unsigned short          __pad2;
+       unsigned long           __unused1;
+       unsigned long           __unused2;
+};
+
+#endif /* __i386_IPCBUF_H__ */
diff --git a/include/asm-x86_64/irq.h b/include/asm-x86_64/irq.h
new file mode 100644 (file)
index 0000000..9b8f64a
--- /dev/null
@@ -0,0 +1,35 @@
+#ifndef _ASM_IRQ_H
+#define _ASM_IRQ_H
+
+/*
+ *     linux/include/asm/irq.h
+ *
+ *     (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar
+ *
+ *     IRQ/IPI changes taken from work by Thomas Radke
+ *     <tomsoft@informatik.tu-chemnitz.de>
+ */
+
+#define TIMER_IRQ 0
+
+/*
+ * 16 8259A IRQ's, 208 potential APIC interrupt sources.
+ * Right now the APIC is mostly only used for SMP.
+ * 256 vectors is an architectural limit. (we can have
+ * more than 256 devices theoretically, but they will
+ * have to use shared interrupts)
+ * Since vectors 0x00-0x1f are used/reserved for the CPU,
+ * the usable vector space is 0x20-0xff (224 vectors)
+ */
+#define NR_IRQS 224
+
+static __inline__ int irq_cannonicalize(int irq)
+{
+       return ((irq == 2) ? 9 : irq);
+}
+
+extern void disable_irq(unsigned int);
+extern void disable_irq_nosync(unsigned int);
+extern void enable_irq(unsigned int);
+
+#endif /* _ASM_IRQ_H */
diff --git a/include/asm-x86_64/kdebug.h b/include/asm-x86_64/kdebug.h
new file mode 100644 (file)
index 0000000..554aa79
--- /dev/null
@@ -0,0 +1,23 @@
+#ifndef _X86_64_KDEBUG_H
+#define _X86_64_KDEBUG_H 1
+
+#include <linux/notifier.h>
+
+struct pt_regs;
+
+struct die_args { 
+       struct pt_regs *regs;
+       const char *str;
+       long err; 
+}; 
+
+extern struct notifier_block *die_chain;
+
+enum { 
+       DIE_DIE = 1,
+       DIE_INT3,
+       DIE_DEBUG,
+       DIE_PANIC,
+}; 
+       
+#endif
diff --git a/include/asm-x86_64/keyboard.h b/include/asm-x86_64/keyboard.h
new file mode 100644 (file)
index 0000000..97992c5
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ *  linux/include/asm-x8664/keyboard.h
+ *
+ *  Created 3 Nov 1996 by Geert Uytterhoeven
+ */
+
+/*
+ *  This file contains the x8664 architecture specific keyboard definitions
+ */
+
+#ifndef _X8664_KEYBOARD_H
+#define _X8664_KEYBOARD_H
+
+#ifdef __KERNEL__
+
+#include <linux/kernel.h>
+#include <linux/ioport.h>
+#include <linux/kd.h>
+#include <linux/pm.h>
+#include <asm/io.h>
+
+#define KEYBOARD_IRQ                   1
+#define DISABLE_KBD_DURING_INTERRUPTS  0
+
+extern int pckbd_setkeycode(unsigned int scancode, unsigned int keycode);
+extern int pckbd_getkeycode(unsigned int scancode);
+extern int pckbd_translate(unsigned char scancode, unsigned char *keycode,
+                          char raw_mode);
+extern char pckbd_unexpected_up(unsigned char keycode);
+extern void pckbd_leds(unsigned char leds);
+extern void pckbd_init_hw(void);
+extern int pckbd_pm_resume(struct pm_dev *dev, pm_request_t rqst, void *data);
+extern unsigned char pckbd_sysrq_xlate[128];
+
+#define kbd_setkeycode         pckbd_setkeycode
+#define kbd_getkeycode         pckbd_getkeycode
+#define kbd_translate          pckbd_translate
+#define kbd_unexpected_up      pckbd_unexpected_up
+#define kbd_leds               pckbd_leds
+#define kbd_init_hw            pckbd_init_hw
+#define kbd_sysrq_xlate                pckbd_sysrq_xlate
+
+#define SYSRQ_KEY 0x54
+
+/* resource allocation */
+#define kbd_request_region()
+#define kbd_request_irq(handler) request_irq(KEYBOARD_IRQ, handler, 0, \
+                                             "keyboard", NULL)
+
+/* How to access the keyboard macros on this platform.  */
+#define kbd_read_input() inb(KBD_DATA_REG)
+#define kbd_read_status() inb(KBD_STATUS_REG)
+#define kbd_write_output(val) outb(val, KBD_DATA_REG)
+#define kbd_write_command(val) outb(val, KBD_CNTL_REG)
+
+/* Some stoneage hardware needs delays after some operations.  */
+#define kbd_pause() do { } while(0)
+
+/*
+ * Machine specific bits for the PS/2 driver
+ */
+
+#define AUX_IRQ 12
+
+#define aux_request_irq(hand, dev_id)                                  \
+       request_irq(AUX_IRQ, hand, SA_SHIRQ, "PS/2 Mouse", dev_id)
+
+#define aux_free_irq(dev_id) free_irq(AUX_IRQ, dev_id)
+
+#endif /* __KERNEL__ */
+#endif /* _X8664_KEYBOARD_H */
diff --git a/include/asm-x86_64/kmap_types.h b/include/asm-x86_64/kmap_types.h
new file mode 100644 (file)
index 0000000..af85f63
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef _ASM_KMAP_TYPES_H
+#define _ASM_KMAP_TYPES_H
+
+enum km_type {
+       KM_BOUNCE_READ,
+       KM_SKB_DATA,
+       KM_SKB_DATA_SOFTIRQ,
+       KM_USER0,
+       KM_USER1,
+       KM_BIO_IRQ,
+       KM_TYPE_NR
+};
+
+#endif
diff --git a/include/asm-x86_64/ldt.h b/include/asm-x86_64/ldt.h
new file mode 100644 (file)
index 0000000..5ac5ec5
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * ldt.h
+ *
+ * Definitions of structures used with the modify_ldt system call.
+ */
+#ifndef _LINUX_LDT_H
+#define _LINUX_LDT_H
+
+/* Is this to allow userland manipulate LDTs? It looks so. We should
+   consider disallowing LDT manipulations altogether: in long mode
+   there's no possibility of v86 mode, so something will have to
+   break, anyway.                                      --pavel */
+
+/* Maximum number of LDT entries supported. */
+#define LDT_ENTRIES    8192
+/* The size of each LDT entry. */
+#define LDT_ENTRY_SIZE 8
+
+#ifndef __ASSEMBLY__
+struct modify_ldt_ldt_s {
+       unsigned int  entry_number;
+       unsigned long base_addr;
+       unsigned int  limit;
+       unsigned int  seg_32bit:1;
+       unsigned int  contents:2;
+       unsigned int  read_exec_only:1;
+       unsigned int  limit_in_pages:1;
+       unsigned int  seg_not_present:1;
+       unsigned int  useable:1;
+};
+
+#define MODIFY_LDT_CONTENTS_DATA       0
+#define MODIFY_LDT_CONTENTS_STACK      1
+#define MODIFY_LDT_CONTENTS_CODE       2
+
+#endif /* !__ASSEMBLY__ */
+#endif
diff --git a/include/asm-x86_64/linux_logo.h b/include/asm-x86_64/linux_logo.h
new file mode 100644 (file)
index 0000000..fd4a9be
--- /dev/null
@@ -0,0 +1,29 @@
+/* $Id: linux_logo.h,v 1.4 2001/07/05 23:44:45 ak Exp $
+ * include/asm-x86_64/linux_logo.h: This is a linux logo
+ *                                to be displayed on boot.
+ *
+ * Copyright (C) 1996 Larry Ewing (lewing@isc.tamu.edu)
+ * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ *
+ * You can put anything here, but:
+ * LINUX_LOGO_COLORS has to be less than 224
+ * image size has to be 80x80
+ * values have to start from 0x20
+ * (i.e. RGB(linux_logo_red[0],
+ *          linux_logo_green[0],
+ *          linux_logo_blue[0]) is color 0x20)
+ * BW image has to be 80x80 as well, with MS bit
+ * on the left
+ * Serial_console ascii image can be any size,
+ * but should contain %s to display the version
+ */
+#include <linux/init.h>
+#include <linux/version.h>
+
+/* We should create logo of penguin with a big hammer (-: --pavel */
+
+#define linux_logo_banner "Linux/x86-64 version " UTS_RELEASE
+
+#include <linux/linux_logo.h>
+
diff --git a/include/asm-x86_64/locks.h b/include/asm-x86_64/locks.h
new file mode 100644 (file)
index 0000000..1cc171f
--- /dev/null
@@ -0,0 +1,135 @@
+/*
+ *     SMP locks primitives for building ix86 locks
+ *     (not yet used).
+ *
+ *             Alan Cox, alan@redhat.com, 1995
+ */
+/*
+ *     This would be much easier but far less clear and easy
+ *     to borrow for other processors if it was just assembler.
+ */
+
+extern __inline__ void prim_spin_lock(struct spinlock *sp)
+{
+       int processor=smp_processor_id();
+       
+       /*
+        *      Grab the lock bit
+        */
+        
+       while(lock_set_bit(0,&sp->lock))
+       {
+               /*
+                *      Failed, but that's cos we own it!
+                */
+                
+               if(sp->cpu==processor)
+               {
+                       sp->users++;
+                       return 0;
+               }
+               /*
+                *      Spin in the cache S state if possible
+                */
+               while(sp->lock)
+               {
+                       /*
+                        *      Wait for any invalidates to go off
+                        */
+                        
+                       if(smp_invalidate_needed&(1<<processor))
+                               while(lock_clear_bit(processor,&smp_invalidate_needed))
+                                       local_flush_tlb();
+                       sp->spins++;
+               }
+               /*
+                *      Someone wrote the line, we go 'I' and get
+                *      the cache entry. Now try to regrab
+                */
+       }
+       sp->users++;sp->cpu=processor;
+       return 1;
+}
+
+/*
+ *     Release a spin lock
+ */
+extern __inline__ int prim_spin_unlock(struct spinlock *sp)
+{
+       /* This is safe. The decrement is still guarded by the lock. A multilock would
+          not be safe this way */
+       if(!--sp->users)
+       {
+               sp->cpu= NO_PROC_ID;lock_clear_bit(0,&sp->lock);
+               return 1;
+       }
+       return 0;
+}
+
+
+/*
+ *     Non blocking lock grab
+ */
+extern __inline__ int prim_spin_lock_nb(struct spinlock *sp)
+{
+       if(lock_set_bit(0,&sp->lock))
+               return 0;               /* Locked already */
+       sp->users++;
+       return 1;                       /* We got the lock */
+}
+
+
+/*
+ *     These wrap the locking primitives up for usage
+ */
+extern __inline__ void spinlock(struct spinlock *sp)
+{
+       if(sp->priority<current->lock_order)
+               panic("lock order violation: %s (%d)\n", sp->name, current->lock_order);
+       if(prim_spin_lock(sp))
+       {
+               /*
+                *      We got a new lock. Update the priority chain
+                */
+               sp->oldpri=current->lock_order;
+               current->lock_order=sp->priority;
+       }
+}
+
+extern __inline__ void spinunlock(struct spinlock *sp)
+{
+       int pri;
+       if(current->lock_order!=sp->priority)
+               panic("lock release order violation %s (%d)\n", sp->name, current->lock_order);
+       pri=sp->oldpri;
+       if(prim_spin_unlock(sp))
+       {
+               /*
+                *      Update the debugging lock priority chain. We dumped
+                *      our last right to the lock.
+                */
+               current->lock_order=sp->pri;
+       }       
+}
+
+extern __inline__ void spintestlock(struct spinlock *sp)
+{
+       /*
+        *      We do no sanity checks, it's legal to optimistically
+        *      get a lower lock.
+        */
+       prim_spin_lock_nb(sp);
+}
+
+extern __inline__ void spintestunlock(struct spinlock *sp)
+{
+       /*
+        *      A testlock doesn't update the lock chain so we
+        *      must not update it on free
+        */
+       prim_spin_unlock(sp);
+}
diff --git a/include/asm-x86_64/mc146818rtc.h b/include/asm-x86_64/mc146818rtc.h
new file mode 100644 (file)
index 0000000..d6e3009
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Machine dependent access functions for RTC registers.
+ */
+#ifndef _ASM_MC146818RTC_H
+#define _ASM_MC146818RTC_H
+
+#include <asm/io.h>
+
+#ifndef RTC_PORT
+#define RTC_PORT(x)    (0x70 + (x))
+#define RTC_ALWAYS_BCD 1       /* RTC operates in binary mode */
+#endif
+
+/*
+ * The yet supported machines all access the RTC index register via
+ * an ISA port access but the way to access the date register differs ...
+ */
+#define CMOS_READ(addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+inb_p(RTC_PORT(1)); \
+})
+#define CMOS_WRITE(val, addr) ({ \
+outb_p((addr),RTC_PORT(0)); \
+outb_p((val),RTC_PORT(1)); \
+})
+
+#define RTC_IRQ 8
+
+#endif /* _ASM_MC146818RTC_H */
diff --git a/include/asm-x86_64/mman.h b/include/asm-x86_64/mman.h
new file mode 100644 (file)
index 0000000..b370324
--- /dev/null
@@ -0,0 +1,39 @@
+#ifndef __X8664_MMAN_H__
+#define __X8664_MMAN_H__
+
+#define PROT_READ      0x1             /* page can be read */
+#define PROT_WRITE     0x2             /* page can be written */
+#define PROT_EXEC      0x4             /* page can be executed */
+#define PROT_NONE      0x0             /* page can not be accessed */
+
+#define MAP_SHARED     0x01            /* Share changes */
+#define MAP_PRIVATE    0x02            /* Changes are private */
+#define MAP_TYPE       0x0f            /* Mask for type of mapping */
+#define MAP_FIXED      0x10            /* Interpret addr exactly */
+#define MAP_ANONYMOUS  0x20            /* don't use a file */
+#define MAP_32BIT      0x40            /* only give out 32bit addresses */
+
+#define MAP_GROWSDOWN  0x0100          /* stack-like segment */
+#define MAP_DENYWRITE  0x0800          /* ETXTBSY */
+#define MAP_EXECUTABLE 0x1000          /* mark it as an executable */
+#define MAP_LOCKED     0x2000          /* pages are locked */
+#define MAP_NORESERVE  0x4000          /* don't check for reservations */
+
+#define MS_ASYNC       1               /* sync memory asynchronously */
+#define MS_INVALIDATE  2               /* invalidate the caches */
+#define MS_SYNC                4               /* synchronous memory sync */
+
+#define MCL_CURRENT    1               /* lock all current mappings */
+#define MCL_FUTURE     2               /* lock all future mappings */
+
+#define MADV_NORMAL    0x0             /* default page-in behavior */
+#define MADV_RANDOM    0x1             /* page-in minimum required */
+#define MADV_SEQUENTIAL        0x2             /* read-ahead aggressively */
+#define MADV_WILLNEED  0x3             /* pre-fault pages */
+#define MADV_DONTNEED  0x4             /* discard these pages */
+
+/* compatibility flags */
+#define MAP_ANON       MAP_ANONYMOUS
+#define MAP_FILE       0
+
+#endif
diff --git a/include/asm-x86_64/mmu.h b/include/asm-x86_64/mmu.h
new file mode 100644 (file)
index 0000000..b942d99
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef __x86_64_MMU_H
+#define __x86_64_MMU_H
+
+/*
+ * The x86_64 doesn't have a mmu context, but
+ * we put the segment information here.
+ */
+typedef struct { 
+       void *segments;
+       unsigned long cpuvalid;
+} mm_context_t;
+
+#endif
diff --git a/include/asm-x86_64/mmu_context.h b/include/asm-x86_64/mmu_context.h
new file mode 100644 (file)
index 0000000..94bb87a
--- /dev/null
@@ -0,0 +1,95 @@
+#ifndef __X86_64_MMU_CONTEXT_H
+#define __X86_64_MMU_CONTEXT_H
+
+#include <linux/config.h>
+#include <asm/desc.h>
+#include <asm/atomic.h>
+#include <asm/pgalloc.h>
+
+/*
+ * Every architecture must define this function. It's the fastest
+ * way of searching a 168-bit bitmap where the first 128 bits are
+ * unlikely to be set. It's guaranteed that at least one of the 168
+ * bits is cleared.
+ */
+#if MAX_RT_PRIO != 128 || MAX_PRIO != 168
+# error update this function.
+#endif
+
+static inline int __sched_find_first_bit(unsigned long *b)
+{
+       if (b[0])
+               return __ffs(b[0]);
+       if (b[1])
+               return __ffs(b[1]) + 64;
+       if (b[2])
+               return __ffs(b[2]) + 128;
+}
+
+static inline int sched_find_first_bit(unsigned long *b)
+{ 
+       int n = __sched_find_first_bit(b);
+       BUG_ON((unsigned)n > 167);
+       return n; 
+} 
+
+/*
+ * possibly do the LDT unload here?
+ */
+#define destroy_context(mm)            do { } while(0)
+#define init_new_context(tsk,mm)       0
+
+#ifdef CONFIG_SMP
+
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu)
+{
+       if(cpu_tlbstate[cpu].state == TLBSTATE_OK)
+               cpu_tlbstate[cpu].state = TLBSTATE_LAZY;        
+}
+#else
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu)
+{
+}
+#endif
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu)
+{
+       if (likely(prev != next)) {
+               /* stop flush ipis for the previous mm */
+               clear_bit(cpu, &prev->cpu_vm_mask);
+               /*
+                * Re-load LDT if necessary
+                */
+               if (unlikely(prev->context.segments != next->context.segments))
+                       load_LDT(next);
+#ifdef CONFIG_SMP
+               cpu_tlbstate[cpu].state = TLBSTATE_OK;
+               cpu_tlbstate[cpu].active_mm = next;
+#endif
+               set_bit(cpu, &next->cpu_vm_mask);
+               set_bit(cpu, &next->context.cpuvalid);
+               /* Re-load page tables */
+               asm volatile("movq %0,level4_pgt": :"r" (__pa(next->pgd) | 7));
+               __flush_tlb();
+       }
+#ifdef CONFIG_SMP
+       else {
+               cpu_tlbstate[cpu].state = TLBSTATE_OK;
+               if(cpu_tlbstate[cpu].active_mm != next)
+                       BUG();
+               if(!test_and_set_bit(cpu, &next->cpu_vm_mask)) {
+                       /* We were in lazy tlb mode and leave_mm disabled 
+                        * tlb flush IPI delivery. We must flush our tlb.
+                        */
+                       local_flush_tlb();
+               }
+               if (!test_and_set_bit(cpu, &next->context.cpuvalid))
+                       load_LDT(next);
+       }
+#endif
+}
+
+#define activate_mm(prev, next) \
+       switch_mm((prev),(next),NULL,smp_processor_id())
+
+#endif
diff --git a/include/asm-x86_64/mmx.h b/include/asm-x86_64/mmx.h
new file mode 100644 (file)
index 0000000..46b71da
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef _ASM_MMX_H
+#define _ASM_MMX_H
+
+/*
+ *     MMX 3Dnow! helper operations
+ */
+
+#include <linux/types.h>
+extern void *_mmx_memcpy(void *to, const void *from, size_t size);
+extern void mmx_clear_page(void *page);
+extern void mmx_copy_page(void *to, void *from);
+
+#endif
diff --git a/include/asm-x86_64/module.h b/include/asm-x86_64/module.h
new file mode 100644 (file)
index 0000000..e025dfe
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef _ASM_X8664_MODULE_H
+#define _ASM_X8664_MODULE_H
+/*
+ * This file contains the x8664 architecture specific module code.
+ */
+
+#define module_map(x)          vmalloc(x)
+#define module_unmap(x)                vfree(x)
+#define module_arch_init(x)    (0)
+#define arch_init_modules(x)   do { } while (0)
+
+#endif 
diff --git a/include/asm-x86_64/mpspec.h b/include/asm-x86_64/mpspec.h
new file mode 100644 (file)
index 0000000..a8d7bb7
--- /dev/null
@@ -0,0 +1,188 @@
+#ifndef __ASM_MPSPEC_H
+#define __ASM_MPSPEC_H
+
+/*
+ * Structure definitions for SMP machines following the
+ * Intel Multiprocessing Specification 1.1 and 1.4.
+ */
+
+/*
+ * This tag identifies where the SMP configuration
+ * information is. 
+ */
+#define SMP_MAGIC_IDENT        (('_'<<24)|('P'<<16)|('M'<<8)|'_')
+
+/*
+ * a maximum of 16 APICs with the current APIC ID architecture.
+ */
+#define MAX_APICS 16
+
+struct intel_mp_floating
+{
+       char mpf_signature[4];          /* "_MP_"                       */
+       unsigned int mpf_physptr;       /* Configuration table address  */
+       unsigned char mpf_length;       /* Our length (paragraphs)      */
+       unsigned char mpf_specification;/* Specification version        */
+       unsigned char mpf_checksum;     /* Checksum (makes sum 0)       */
+       unsigned char mpf_feature1;     /* Standard or configuration ?  */
+       unsigned char mpf_feature2;     /* Bit7 set for IMCR|PIC        */
+       unsigned char mpf_feature3;     /* Unused (0)                   */
+       unsigned char mpf_feature4;     /* Unused (0)                   */
+       unsigned char mpf_feature5;     /* Unused (0)                   */
+};
+
+struct mp_config_table
+{
+       char mpc_signature[4];
+#define MPC_SIGNATURE "PCMP"
+       unsigned short mpc_length;      /* Size of table */
+       char  mpc_spec;                 /* 0x01 */
+       char  mpc_checksum;
+       char  mpc_oem[8];
+       char  mpc_productid[12];
+       unsigned int mpc_oemptr;        /* 0 if not present */
+       unsigned short mpc_oemsize;     /* 0 if not present */
+       unsigned short mpc_oemcount;
+       unsigned int mpc_lapic; /* APIC address */
+       unsigned int reserved;
+};
+
+/* Followed by entries */
+
+#define        MP_PROCESSOR    0
+#define        MP_BUS          1
+#define        MP_IOAPIC       2
+#define        MP_INTSRC       3
+#define        MP_LINTSRC      4
+
+struct mpc_config_processor
+{
+       unsigned char mpc_type;
+       unsigned char mpc_apicid;       /* Local APIC number */
+       unsigned char mpc_apicver;      /* Its versions */
+       unsigned char mpc_cpuflag;
+#define CPU_ENABLED            1       /* Processor is available */
+#define CPU_BOOTPROCESSOR      2       /* Processor is the BP */
+       unsigned int mpc_cpufeature;            
+#define CPU_STEPPING_MASK 0x0F
+#define CPU_MODEL_MASK 0xF0
+#define CPU_FAMILY_MASK        0xF00
+       unsigned int mpc_featureflag;   /* CPUID feature value */
+       unsigned int mpc_reserved[2];
+};
+
+struct mpc_config_bus
+{
+       unsigned char mpc_type;
+       unsigned char mpc_busid;
+       unsigned char mpc_bustype[6] __attribute((packed));
+};
+
+/* List of Bus Type string values, Intel MP Spec. */
+#define BUSTYPE_EISA   "EISA"
+#define BUSTYPE_ISA    "ISA"
+#define BUSTYPE_INTERN "INTERN"        /* Internal BUS */
+#define BUSTYPE_MCA    "MCA"
+#define BUSTYPE_VL     "VL"            /* Local bus */
+#define BUSTYPE_PCI    "PCI"
+#define BUSTYPE_PCMCIA "PCMCIA"
+#define BUSTYPE_CBUS   "CBUS"
+#define BUSTYPE_CBUSII "CBUSII"
+#define BUSTYPE_FUTURE "FUTURE"
+#define BUSTYPE_MBI    "MBI"
+#define BUSTYPE_MBII   "MBII"
+#define BUSTYPE_MPI    "MPI"
+#define BUSTYPE_MPSA   "MPSA"
+#define BUSTYPE_NUBUS  "NUBUS"
+#define BUSTYPE_TC     "TC"
+#define BUSTYPE_VME    "VME"
+#define BUSTYPE_XPRESS "XPRESS"
+
+struct mpc_config_ioapic
+{
+       unsigned char mpc_type;
+       unsigned char mpc_apicid;
+       unsigned char mpc_apicver;
+       unsigned char mpc_flags;
+#define MPC_APIC_USABLE                0x01
+       unsigned int mpc_apicaddr;
+};
+
+struct mpc_config_intsrc
+{
+       unsigned char mpc_type;
+       unsigned char mpc_irqtype;
+       unsigned short mpc_irqflag;
+       unsigned char mpc_srcbus;
+       unsigned char mpc_srcbusirq;
+       unsigned char mpc_dstapic;
+       unsigned char mpc_dstirq;
+};
+
+enum mp_irq_source_types {
+       mp_INT = 0,
+       mp_NMI = 1,
+       mp_SMI = 2,
+       mp_ExtINT = 3
+};
+
+#define MP_IRQDIR_DEFAULT      0
+#define MP_IRQDIR_HIGH         1
+#define MP_IRQDIR_LOW          3
+
+
+struct mpc_config_lintsrc
+{
+       unsigned char mpc_type;
+       unsigned char mpc_irqtype;
+       unsigned short mpc_irqflag;
+       unsigned char mpc_srcbusid;
+       unsigned char mpc_srcbusirq;
+       unsigned char mpc_destapic;     
+#define MP_APIC_ALL    0xFF
+       unsigned char mpc_destapiclint;
+};
+
+/*
+ *     Default configurations
+ *
+ *     1       2 CPU ISA 82489DX
+ *     2       2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining
+ *     3       2 CPU EISA 82489DX
+ *     4       2 CPU MCA 82489DX
+ *     5       2 CPU ISA+PCI
+ *     6       2 CPU EISA+PCI
+ *     7       2 CPU MCA+PCI
+ */
+
+#define MAX_IRQ_SOURCES 256
+#define MAX_MP_BUSSES 32
+enum mp_bustype {
+       MP_BUS_ISA = 1,
+       MP_BUS_EISA,
+       MP_BUS_PCI,
+       MP_BUS_MCA
+};
+extern int mp_bus_id_to_type [MAX_MP_BUSSES];
+extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
+
+extern unsigned int boot_cpu_physical_apicid;
+extern unsigned long phys_cpu_present_map;
+extern int smp_found_config;
+extern void find_smp_config (void);
+extern void get_smp_config (void);
+extern int nr_ioapics;
+extern int apic_version [MAX_APICS];
+extern int mp_bus_id_to_type [MAX_MP_BUSSES];
+extern int mp_irq_entries;
+extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES];
+extern int mpc_default_type;
+extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
+extern int mp_current_pci_id;
+extern unsigned long mp_lapic_addr;
+extern int pic_mode;
+extern int using_apic_timer;
+
+#endif
+
diff --git a/include/asm-x86_64/msgbuf.h b/include/asm-x86_64/msgbuf.h
new file mode 100644 (file)
index 0000000..cd6f95d
--- /dev/null
@@ -0,0 +1,27 @@
+#ifndef _X8664_MSGBUF_H
+#define _X8664_MSGBUF_H
+
+/* 
+ * The msqid64_ds structure for x86-64 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 2 miscellaneous 64-bit values
+ */
+
+struct msqid64_ds {
+       struct ipc64_perm msg_perm;
+       __kernel_time_t msg_stime;      /* last msgsnd time */
+       __kernel_time_t msg_rtime;      /* last msgrcv time */
+       __kernel_time_t msg_ctime;      /* last change time */
+       unsigned long  msg_cbytes;      /* current number of bytes on queue */
+       unsigned long  msg_qnum;        /* number of messages in queue */
+       unsigned long  msg_qbytes;      /* max number of bytes on queue */
+       __kernel_pid_t msg_lspid;       /* pid of last msgsnd */
+       __kernel_pid_t msg_lrpid;       /* last receive pid */
+       unsigned long  __unused4;
+       unsigned long  __unused5;
+};
+
+#endif
diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h
new file mode 100644 (file)
index 0000000..3ce6af9
--- /dev/null
@@ -0,0 +1,140 @@
+#ifndef X86_64_MSR_H
+#define X86_64_MSR_H 1
+/*
+ * Access to machine-specific registers (available on 586 and better only)
+ * Note: the rd* operations modify the parameters directly (without using
+ * pointer indirection), this allows gcc to optimize better
+ */
+
+#define rdmsr(msr,val1,val2) \
+       __asm__ __volatile__("rdmsr" \
+                           : "=a" (val1), "=d" (val2) \
+                           : "c" (msr))
+
+
+#define rdmsrl(msr,val) do { unsigned long a__,b__; \
+       __asm__ __volatile__("rdmsr" \
+                           : "=a" (a__), "=d" (b__) \
+                           : "c" (msr)); \
+       val = a__ | (b__<<32); \
+} while(0); 
+
+#define wrmsr(msr,val1,val2) \
+     __asm__ __volatile__("wrmsr" \
+                         : /* no outputs */ \
+                         : "c" (msr), "a" (val1), "d" (val2))
+
+#define wrmsrl(msr,val) wrmsr(msr,(__u32)((__u64)(val)),((__u64)(val))>>32) 
+
+/* wrmsrl with exception handling */
+#define checking_wrmsrl(msr,val) ({ int ret__;                                         \
+       asm volatile("2: wrmsr ; xorl %0,%0\n"                                          \
+                    "1:\n\t"                                                           \
+                    ".section .fixup,\"ax\"\n\t"                                       \
+                    "3:  movl %4,%0 ; jmp 1b\n\t"                                      \
+                    ".previous\n\t"                                                    \
+                    ".section __ex_table,\"a\"\n"                                      \
+                    "   .align 8\n\t"                                                  \
+                    "   .quad  2b,3b\n\t"                                              \
+                    ".previous"                                                        \
+                    : "=a" (ret__)                                                     \
+                    : "c" (msr), "0" ((__u32)val), "d" ((val)>>32), "i" (-EFAULT));    \
+       ret__; })
+
+#define rdtsc(low,high) \
+     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
+
+#define rdtscl(low) \
+     __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx")
+
+#define rdtscll(val) \
+     __asm__ __volatile__ ("rdtsc" : "=A" (val))
+
+#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
+
+#define rdpmc(counter,low,high) \
+     __asm__ __volatile__("rdpmc" \
+                         : "=a" (low), "=d" (high) \
+                         : "c" (counter))
+
+
+/* AMD/K8 specific MSRs */ 
+#define MSR_EFER 0xc0000080            /* extended feature register */
+#define MSR_STAR 0xc0000081            /* legacy mode SYSCALL target */
+#define MSR_LSTAR 0xc0000082           /* long mode SYSCALL target */
+#define MSR_CSTAR 0xc0000083           /* compatibility mode SYSCALL target */
+#define MSR_SYSCALL_MASK 0xc0000084    /* EFLAGS mask for syscall */
+#define MSR_FS_BASE 0xc0000100         /* 64bit GS base */
+#define MSR_GS_BASE 0xc0000101         /* 64bit FS base */
+#define MSR_KERNEL_GS_BASE  0xc0000102 /* SwapGS GS shadow (or USER_GS from kernel view) */ 
+
+
+/* Intel MSRs. Some also available on other CPUs */
+#define MSR_IA32_PLATFORM_ID   0x17
+
+#define MSR_IA32_PERFCTR0      0xc1
+#define MSR_IA32_PERFCTR1      0xc2
+
+#define MSR_IA32_BBL_CR_CTL        0x119
+
+#define MSR_IA32_MCG_CAP       0x179
+#define MSR_IA32_MCG_STATUS        0x17a
+#define MSR_IA32_MCG_CTL       0x17b
+
+#define MSR_IA32_EVNTSEL0      0x186
+#define MSR_IA32_EVNTSEL1      0x187
+
+#define MSR_IA32_DEBUGCTLMSR       0x1d9
+#define MSR_IA32_LASTBRANCHFROMIP  0x1db
+#define MSR_IA32_LASTBRANCHTOIP        0x1dc
+#define MSR_IA32_LASTINTFROMIP     0x1dd
+#define MSR_IA32_LASTINTTOIP       0x1de
+
+#define MSR_IA32_MC0_CTL       0x400
+#define MSR_IA32_MC0_STATUS        0x401
+#define MSR_IA32_MC0_ADDR      0x402
+#define MSR_IA32_MC0_MISC      0x403
+
+/* K7 MSRs */
+#define MSR_K7_EVNTSEL0            0xC0010000
+#define MSR_K7_PERFCTR0            0xC0010004
+
+/* K6 MSRs */
+#define MSR_K6_EFER                    0xC0000080
+#define MSR_K6_STAR                    0xC0000081
+#define MSR_K6_WHCR                    0xC0000082
+#define MSR_K6_UWCCR                   0xC0000085
+#define MSR_K6_PSOR                    0xC0000087
+#define MSR_K6_PFIR                    0xC0000088
+
+/* Centaur-Hauls/IDT defined MSRs. */
+#define MSR_IDT_FCR1                   0x107
+#define MSR_IDT_FCR2                   0x108
+#define MSR_IDT_FCR3                   0x109
+#define MSR_IDT_FCR4                   0x10a
+
+#define MSR_IDT_MCR0                   0x110
+#define MSR_IDT_MCR1                   0x111
+#define MSR_IDT_MCR2                   0x112
+#define MSR_IDT_MCR3                   0x113
+#define MSR_IDT_MCR4                   0x114
+#define MSR_IDT_MCR5                   0x115
+#define MSR_IDT_MCR6                   0x116
+#define MSR_IDT_MCR7                   0x117
+#define MSR_IDT_MCR_CTRL               0x120
+
+/* VIA Cyrix defined MSRs*/
+#define MSR_VIA_FCR                    0x1107
+
+/* Intel defined MSRs. */
+#define MSR_IA32_P5_MC_ADDR            0
+#define MSR_IA32_P5_MC_TYPE            1
+#define MSR_IA32_PLATFORM_ID           0x17
+#define MSR_IA32_EBL_CR_POWERON                0x2a
+
+#define MSR_IA32_APICBASE               0x1b
+#define MSR_IA32_APICBASE_BSP           (1<<8)
+#define MSR_IA32_APICBASE_ENABLE        (1<<11)
+#define MSR_IA32_APICBASE_BASE          (0xfffff<<12)
+
+#endif
diff --git a/include/asm-x86_64/mtrr.h b/include/asm-x86_64/mtrr.h
new file mode 100644 (file)
index 0000000..ff3ea87
--- /dev/null
@@ -0,0 +1,127 @@
+/*  Generic MTRR (Memory Type Range Register) ioctls.
+
+    Copyright (C) 1997-1999  Richard Gooch
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Library General Public
+    License as published by the Free Software Foundation; either
+    version 2 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Library General Public License for more details.
+
+    You should have received a copy of the GNU Library General Public
+    License along with this library; if not, write to the Free
+    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+    Richard Gooch may be reached by email at  rgooch@atnf.csiro.au
+    The postal address is:
+      Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+*/
+#ifndef _LINUX_MTRR_H
+#define _LINUX_MTRR_H
+
+#include <linux/config.h>
+#include <linux/ioctl.h>
+
+#define        MTRR_IOCTL_BASE 'M'
+
+struct mtrr_sentry
+{
+    unsigned long base;    /*  Base address     */
+    unsigned long size;    /*  Size of region   */
+    unsigned int type;     /*  Type of region   */
+};
+
+struct mtrr_gentry
+{
+    unsigned int regnum;   /*  Register number  */
+    unsigned long base;    /*  Base address     */
+    unsigned long size;    /*  Size of region   */
+    unsigned int type;     /*  Type of region   */
+};
+
+/*  These are the various ioctls  */
+#define MTRRIOC_ADD_ENTRY        _IOW(MTRR_IOCTL_BASE,  0, struct mtrr_sentry)
+#define MTRRIOC_SET_ENTRY        _IOW(MTRR_IOCTL_BASE,  1, struct mtrr_sentry)
+#define MTRRIOC_DEL_ENTRY        _IOW(MTRR_IOCTL_BASE,  2, struct mtrr_sentry)
+#define MTRRIOC_GET_ENTRY        _IOWR(MTRR_IOCTL_BASE, 3, struct mtrr_gentry)
+#define MTRRIOC_KILL_ENTRY       _IOW(MTRR_IOCTL_BASE,  4, struct mtrr_sentry)
+#define MTRRIOC_ADD_PAGE_ENTRY   _IOW(MTRR_IOCTL_BASE,  5, struct mtrr_sentry)
+#define MTRRIOC_SET_PAGE_ENTRY   _IOW(MTRR_IOCTL_BASE,  6, struct mtrr_sentry)
+#define MTRRIOC_DEL_PAGE_ENTRY   _IOW(MTRR_IOCTL_BASE,  7, struct mtrr_sentry)
+#define MTRRIOC_GET_PAGE_ENTRY   _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry)
+#define MTRRIOC_KILL_PAGE_ENTRY  _IOW(MTRR_IOCTL_BASE,  9, struct mtrr_sentry)
+
+/*  These are the region types  */
+#define MTRR_TYPE_UNCACHABLE 0
+#define MTRR_TYPE_WRCOMB     1
+/*#define MTRR_TYPE_         2*/
+/*#define MTRR_TYPE_         3*/
+#define MTRR_TYPE_WRTHROUGH  4
+#define MTRR_TYPE_WRPROT     5
+#define MTRR_TYPE_WRBACK     6
+#define MTRR_NUM_TYPES       7
+
+#ifdef MTRR_NEED_STRINGS
+static char *mtrr_strings[MTRR_NUM_TYPES] =
+{
+    "uncachable",               /* 0 */
+    "write-combining",          /* 1 */
+    "?",                        /* 2 */
+    "?",                        /* 3 */
+    "write-through",            /* 4 */
+    "write-protect",            /* 5 */
+    "write-back",               /* 6 */
+};
+#endif
+
+#ifdef __KERNEL__
+
+/*  The following functions are for use by other drivers  */
+# ifdef CONFIG_MTRR
+extern int mtrr_add (unsigned long base, unsigned long size,
+                    unsigned int type, char increment);
+extern int mtrr_add_page (unsigned long base, unsigned long size,
+                    unsigned int type, char increment);
+extern int mtrr_del (int reg, unsigned long base, unsigned long size);
+extern int mtrr_del_page (int reg, unsigned long base, unsigned long size);
+extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
+#  else
+static __inline__ int mtrr_add (unsigned long base, unsigned long size,
+                               unsigned int type, char increment)
+{
+    return -ENODEV;
+}
+static __inline__ int mtrr_add_page (unsigned long base, unsigned long size,
+                               unsigned int type, char increment)
+{
+    return -ENODEV;
+}
+static __inline__ int mtrr_del (int reg, unsigned long base,
+                               unsigned long size)
+{
+    return -ENODEV;
+}
+static __inline__ int mtrr_del_page (int reg, unsigned long base,
+                               unsigned long size)
+{
+    return -ENODEV;
+}
+
+static __inline__ void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) {;}
+
+#  endif
+
+/*  The following functions are for initialisation: don't use them!  */
+extern int mtrr_init (void);
+#  if defined(CONFIG_SMP) && defined(CONFIG_MTRR)
+extern void mtrr_init_boot_cpu (void);
+extern void mtrr_init_secondary_cpu (void);
+#  endif
+
+#endif
+
+#endif  /*  _LINUX_MTRR_H  */
diff --git a/include/asm-x86_64/namei.h b/include/asm-x86_64/namei.h
new file mode 100644 (file)
index 0000000..c6f90b5
--- /dev/null
@@ -0,0 +1,17 @@
+/* $Id: namei.h,v 1.2 2001/07/04 09:08:13 ak Exp $
+ * linux/include/asm-i386/namei.h
+ *
+ * Included from linux/fs/namei.c
+ */
+
+#ifndef __X8664_NAMEI_H
+#define __X8664_NAMEI_H
+
+/* This dummy routine maybe changed to something useful
+ * for /usr/gnemul/ emulation stuff.
+ * Look at asm-sparc/namei.h for details.
+ */
+
+#define __emul_prefix() NULL
+
+#endif
diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h
new file mode 100644 (file)
index 0000000..48a878d
--- /dev/null
@@ -0,0 +1,118 @@
+#ifndef _X86_64_PAGE_H
+#define _X86_64_PAGE_H
+
+/* PAGE_SHIFT determines the page size */
+#define PAGE_SHIFT     12
+#ifdef __ASSEMBLY__
+#define PAGE_SIZE      (0x1 << PAGE_SHIFT)
+#else
+#define PAGE_SIZE      (1UL << PAGE_SHIFT)
+#endif
+#define PAGE_MASK      (~(PAGE_SIZE-1))
+#define THREAD_SIZE (2*PAGE_SIZE)
+#define CURRENT_MASK (~(THREAD_SIZE-1))
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+#include <linux/config.h>
+
+#ifdef CONFIG_X86_USE_3DNOW
+
+#include <asm/mmx.h>
+
+#define clear_page(page)       mmx_clear_page((void *)(page))
+#define copy_page(to,from)     mmx_copy_page(to,from)
+
+#else
+
+/*
+ *     On older X86 processors its not a win to use MMX here it seems.
+ *     Maybe the K6-III ?
+ */
+
+#define clear_page(page)       memset((void *)(page), 0, PAGE_SIZE)
+
+#define copy_page(to,from)     memcpy((void *)(to), (void *)(from), PAGE_SIZE)
+
+#endif
+
+#define clear_user_page(page, vaddr)   clear_page(page)
+#define copy_user_page(to, from, vaddr)        copy_page(to, from)
+
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pmd; } pmd_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long level4; } level4_t;
+#define PTE_MASK       PAGE_MASK
+
+typedef struct { unsigned long pgprot; } pgprot_t;
+
+#define pte_val(x)     ((x).pte)
+#define pmd_val(x)     ((x).pmd)
+#define pgd_val(x)     ((x).pgd)
+#define level4_val(x)  ((x).level4)
+#define pgprot_val(x)  ((x).pgprot)
+
+#define __pte(x) ((pte_t) { (x) } )
+#define __pmd(x) ((pmd_t) { (x) } )
+#define __pgd(x) ((pgd_t) { (x) } )
+#define __level4(x) ((level4_t) { (x) } )
+#define __pgprot(x)    ((pgprot_t) { (x) } )
+
+#endif /* !__ASSEMBLY__ */
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr)       (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+
+
+#define __START_KERNEL         0xffffffff80100000
+#define __START_KERNEL_map     0xffffffff80000000
+#define __PAGE_OFFSET           0xffff800000000000
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Tell the user there is some problem.
+ */ 
+
+struct bug_frame { 
+       unsigned short ud2;          
+       char *filename;    /* should use 32bit offset instead, but the assembler doesn't like it */ 
+       unsigned short line; 
+} __attribute__((packed)); 
+
+#define BUG() asm volatile("ud2 ; .quad %c1 ; .short %c0" :: "i"(__LINE__), "i" (__FILE__))
+#define PAGE_BUG(page) BUG(); 
+
+/* Pure 2^n version of get_order */
+extern __inline__ int get_order(unsigned long size)
+{
+       int order;
+
+       size = (size-1) >> (PAGE_SHIFT-1);
+       order = -1;
+       do {
+               size >>= 1;
+               order++;
+       } while (size);
+       return order;
+}
+
+static unsigned long start_kernel_map __attribute__((unused)) = __START_KERNEL_map; /* FIXME: workaround gcc bug */
+
+#endif /* __ASSEMBLY__ */
+
+#define PAGE_OFFSET            ((unsigned long)__PAGE_OFFSET)
+#define __pa(x)                        (((unsigned long)(x)>=start_kernel_map)?(unsigned long)(x) - (unsigned long)start_kernel_map:(unsigned long)(x) - PAGE_OFFSET)
+#define __va(x)                        ((void *)((unsigned long)(x)+PAGE_OFFSET))
+#define virt_to_page(kaddr)    (mem_map + (__pa(kaddr) >> PAGE_SHIFT))
+#define VALID_PAGE(page)       ((page - mem_map) < max_mapnr)
+
+
+#endif /* __KERNEL__ */
+
+#endif /* _X86_64_PAGE_H */
diff --git a/include/asm-x86_64/param.h b/include/asm-x86_64/param.h
new file mode 100644 (file)
index 0000000..601733b
--- /dev/null
@@ -0,0 +1,24 @@
+#ifndef _ASMx86_64_PARAM_H
+#define _ASMx86_64_PARAM_H
+
+#ifndef HZ
+#define HZ 100
+#endif
+
+#define EXEC_PAGESIZE  4096
+
+#ifndef NGROUPS
+#define NGROUPS                32
+#endif
+
+#ifndef NOGROUP
+#define NOGROUP                (-1)
+#endif
+
+#define MAXHOSTNAMELEN 64      /* max length of hostname */
+
+#ifdef __KERNEL__
+# define CLOCKS_PER_SEC        100     /* frequency at which times() counts */
+#endif
+
+#endif
diff --git a/include/asm-x86_64/parport.h b/include/asm-x86_64/parport.h
new file mode 100644 (file)
index 0000000..7135ef9
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ * parport.h: ia32-specific parport initialisation
+ *
+ * Copyright (C) 1999, 2000  Tim Waugh <tim@cyberelk.demon.co.uk>
+ *
+ * This file should only be included by drivers/parport/parport_pc.c.
+ */
+
+#ifndef _ASM_X8664_PARPORT_H
+#define _ASM_X8664_PARPORT_H 1
+
+static int __devinit parport_pc_find_isa_ports (int autoirq, int autodma);
+static int __devinit parport_pc_find_nonpci_ports (int autoirq, int autodma)
+{
+       return parport_pc_find_isa_ports (autoirq, autodma);
+}
+
+#endif 
diff --git a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h
new file mode 100644 (file)
index 0000000..167ab5d
--- /dev/null
@@ -0,0 +1,273 @@
+#ifndef __x8664_PCI_H
+#define __x8664_PCI_H
+
+#include <linux/config.h>
+#include <asm/io.h>
+
+#ifdef __KERNEL__
+
+/* Can be used to override the logic in pci_scan_bus for skipping
+   already-configured bus numbers - to be used for buggy BIOSes
+   or architectures with incomplete PCI setup by the loader */
+
+#ifdef CONFIG_PCI
+extern unsigned int pcibios_assign_all_busses(void);
+#else
+#define pcibios_assign_all_busses()    0
+#endif
+
+extern unsigned long pci_mem_start;
+#define PCIBIOS_MIN_IO         0x1000
+#define PCIBIOS_MIN_MEM                (pci_mem_start)
+
+void pcibios_set_master(struct pci_dev *dev);
+void pcibios_penalize_isa_irq(int irq);
+struct irq_routing_table *pcibios_get_irq_routing_table(void);
+int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
+
+/* Dynamic DMA mapping stuff.
+ * x8664 has everything mapped statically.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <asm/scatterlist.h>
+#include <linux/string.h>
+#include <asm/io.h>
+
+struct pci_dev;
+
+/* The PCI address space does equal the physical memory
+ * address space.  The networking and block device layers use
+ * this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS    (1)
+
+/* Allocate and map kernel buffer using consistent mode DMA for a device.
+ * hwdev should be valid struct pci_dev pointer for PCI devices,
+ * NULL for PCI-like buses (ISA, EISA).
+ * Returns non-NULL cpu-view pointer to the buffer if successful and
+ * sets *dma_addrp to the pci side dma address as well, else *dma_addrp
+ * is undefined.
+ */
+extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
+                                 dma_addr_t *dma_handle);
+
+/* Free and unmap a consistent DMA buffer.
+ * cpu_addr is what was returned from pci_alloc_consistent,
+ * size must be the same as what as passed into pci_alloc_consistent,
+ * and likewise dma_addr must be the same as what *dma_addrp was set to.
+ *
+ * References to the memory and mappings associated with cpu_addr/dma_addr
+ * past this call are illegal.
+ */
+extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
+                               void *vaddr, dma_addr_t dma_handle);
+
+/* Map a single buffer of the indicated size for DMA in streaming mode.
+ * The 32-bit bus address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory
+ * until either pci_unmap_single or pci_dma_sync_single is performed.
+ */
+static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
+                                       size_t size, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               BUG();
+       flush_write_buffers();
+       return virt_to_phys(ptr);
+}
+
+/* Unmap a single streaming mode DMA translation.  The dma_addr and size
+ * must match what was provided for in a previous pci_map_single call.  All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guarenteed to see
+ * whatever the device wrote there.
+ */
+static inline void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
+                                   size_t size, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               BUG();
+       /* Nothing to do */
+}
+
+/*
+ * pci_{map,unmap}_single_page maps a kernel page to a dma_addr_t. identical
+ * to pci_map_single, but takes a struct page instead of a virtual address
+ */
+static inline dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page,
+                                     unsigned long offset, size_t size, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               BUG();
+
+       return (page - mem_map) * PAGE_SIZE + offset;
+}
+
+static inline void pci_unmap_page(struct pci_dev *hwdev, dma_addr_t dma_address,
+                                 size_t size, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               BUG();
+       /* Nothing to do */
+}
+
+/* pci_unmap_{page,single} is a nop so... */
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
+#define pci_unmap_addr(PTR, ADDR_NAME)         (0)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)        do { } while (0)
+#define pci_unmap_len(PTR, LEN_NAME)           (0)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL)  do { } while (0)
+
+/* Map a set of buffers described by scatterlist in streaming
+ * mode for DMA.  This is the scather-gather version of the
+ * above pci_map_single interface.  Here the scatter gather list
+ * elements are each tagged with the appropriate dma address
+ * and length.  They are obtained via sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ *       DMA address/length pairs than there are SG table elements.
+ *       (for example via virtual mapping capabilities)
+ *       The routine returns the number of addr/length pairs actually
+ *       used, at most nents.
+ *
+ * Device ownership issues as mentioned above for pci_map_single are
+ * the same here.
+ */
+static inline int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
+                            int nents, int direction)
+{
+       int i;
+
+       if (direction == PCI_DMA_NONE)
+               BUG();
+
+       for (i = 0; i < nents; i++ ) {
+               if (!sg[i].page)
+                       BUG();
+
+               sg[i].dma_address = page_to_phys(sg[i].page) + sg[i].offset;
+       }
+
+       flush_write_buffers();
+       return nents;
+}
+
+/* Unmap a set of streaming mode DMA translations.
+ * Again, cpu read rules concerning calls here are the same as for
+ * pci_unmap_single() above.
+ */
+static inline void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
+                               int nents, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               BUG();
+       /* Nothing to do */
+}
+
+/* Make physical memory consistent for a single
+ * streaming mode DMA translation after a transfer.
+ *
+ * If you perform a pci_map_single() but wish to interrogate the
+ * buffer using the cpu, yet do not wish to teardown the PCI dma
+ * mapping, you must call this function before doing so.  At the
+ * next point you give the PCI dma address back to the card, the
+ * device again owns the buffer.
+ */
+static inline void pci_dma_sync_single(struct pci_dev *hwdev,
+                                      dma_addr_t dma_handle,
+                                      size_t size, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               BUG();
+       flush_write_buffers();
+}
+
+/* Make physical memory consistent for a set of streaming
+ * mode DMA translations after a transfer.
+ *
+ * The same as pci_dma_sync_single but for a scatter-gather list,
+ * same rules and usage.
+ */
+static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
+                                  struct scatterlist *sg,
+                                  int nelems, int direction)
+{
+       if (direction == PCI_DMA_NONE)
+               BUG();
+       flush_write_buffers();
+}
+
+/* Return whether the given PCI device DMA address mask can
+ * be supported properly.  For example, if your device can
+ * only drive the low 24-bits during PCI bus mastering, then
+ * you would pass 0x00ffffff as the mask to this function.
+ */
+static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask)
+{
+        /*
+         * we fall back to GFP_DMA when the mask isn't all 1s,
+         * so we can't guarantee allocations that must be
+         * within a tighter range than GFP_DMA..
+         */
+        if(mask < 0x00ffffff)
+                return 0;
+
+       return 1;
+}
+
+/* This is always fine. */
+#define pci_dac_dma_supported(pci_dev, mask)   (1)
+
+static __inline__ dma64_addr_t
+pci_dac_page_to_dma(struct pci_dev *pdev, struct page *page, unsigned long offset, int direction)
+{
+       return ((dma64_addr_t) page_to_phys(page) +
+               (dma64_addr_t) offset);
+}
+
+static __inline__ struct page *
+pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
+{
+       unsigned long poff = (dma_addr >> PAGE_SHIFT);
+
+       return mem_map + poff;
+}
+
+static __inline__ unsigned long
+pci_dac_dma_to_offset(struct pci_dev *pdev, dma64_addr_t dma_addr)
+{
+       return (dma_addr & ~PAGE_MASK);
+}
+
+static __inline__ void
+pci_dac_dma_sync_single(struct pci_dev *pdev, dma64_addr_t dma_addr, size_t len, int direction)
+{
+       flush_write_buffers();
+}
+
+/* These macros should be used after a pci_map_sg call has been done
+ * to get bus addresses of each of the SG entries and their lengths.
+ * You should only work with the number of sg entries pci_map_sg
+ * returns.
+ */
+#define sg_dma_address(sg)     ((sg)->dma_address)
+#define sg_dma_len(sg)         ((sg)->length)
+
+/* Return the index of the PCI controller for device. */
+static inline int pci_controller_num(struct pci_dev *dev)
+{
+       return 0;
+}
+
+#define HAVE_PCI_MMAP
+extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
+                              enum pci_mmap_state mmap_state, int write_combine);
+
+#endif /* __KERNEL__ */
+
+#endif /* __x8664_PCI_H */
diff --git a/include/asm-x86_64/pda.h b/include/asm-x86_64/pda.h
new file mode 100644 (file)
index 0000000..396dc01
--- /dev/null
@@ -0,0 +1,79 @@
+#ifndef X86_64_PDA_H
+#define X86_64_PDA_H
+
+#include <linux/stddef.h>
+#ifndef ASM_OFFSET_H
+#include <asm/offset.h>
+#endif
+#include <linux/cache.h>
+
+struct task_struct; 
+
+/* Per processor datastructure. %gs points to it while the kernel runs */ 
+/* To use a new field with the *_pda macros it needs to be added to tools/offset.c */
+struct x8664_pda {
+       struct x8664_pda *me; 
+       unsigned long kernelstack;  /* TOS for current process */ 
+       unsigned long oldrsp;       /* user rsp for system call */
+       unsigned long irqrsp;       /* Old rsp for interrupts. */ 
+       struct task_struct *pcurrent;   /* Current process */
+        int irqcount;              /* Irq nesting counter. Starts with -1 */   
+       int cpunumber;              /* Logical CPU number */
+       /* XXX: could be a single list */
+       unsigned long *pgd_quick;
+       unsigned long *pmd_quick;
+       unsigned long *pte_quick;
+       unsigned long pgtable_cache_sz;
+       char *irqstackptr;        
+       unsigned int __softirq_pending;
+       unsigned int __local_irq_count;
+       unsigned int __local_bh_count;
+       unsigned int __nmi_count;       /* arch dependent */
+       struct task_struct * __ksoftirqd_task; /* waitqueue is too large */
+       char irqstack[16 * 1024];   /* Stack used by interrupts */     
+} ____cacheline_aligned;
+
+#define PDA_STACKOFFSET (5*8)
+
+extern struct x8664_pda cpu_pda[];
+
+/* 
+ * There is no fast way to get the base address of the PDA, all the accesses
+ * have to mention %fs/%gs.  So it needs to be done this Torvaldian way.
+ */ 
+#define sizeof_field(type,field)  (sizeof(((type *)0)->field))
+#define typeof_field(type,field)  typeof(((type *)0)->field)
+#ifndef __STR
+#define __STR(x) #x
+#endif
+#define __STR2(x) __STR(x) 
+
+extern void __bad_pda_field(void);
+
+#define pda_to_op(op,field,val) do { \
+       switch (sizeof_field(struct x8664_pda, field)) {                \
+       case 2: asm volatile(op "w %0,%%gs:" __STR2(pda_ ## field) ::"r" (val):"memory"); break;        \
+       case 4: asm volatile(op "l %0,%%gs:" __STR2(pda_ ## field) ::"r" (val):"memory"); break;        \
+       case 8: asm volatile(op "q %0,%%gs:" __STR2(pda_ ## field) ::"r" (val):"memory"); break;        \
+       default: __bad_pda_field();                                     \
+       } \
+       } while (0)
+
+
+#define pda_from_op(op,field) ({ \
+       typedef typeof_field(struct x8664_pda, field) T__; T__ ret__; \
+       switch (sizeof_field(struct x8664_pda, field)) {                \
+       case 2: asm volatile (op "w %%gs:" __STR2(pda_ ## field) ",%0":"=r" (ret__)::"memory"); break;  \
+       case 4: asm volatile (op "l %%gs:" __STR2(pda_ ## field) ",%0":"=r" (ret__)::"memory"); break;  \
+       case 8: asm volatile (op "q %%gs:" __STR2(pda_ ## field) ",%0":"=r" (ret__)::"memory"); break;  \
+       default: __bad_pda_field();                                     \
+       } \
+       ret__; })
+
+
+#define read_pda(field) pda_from_op("mov",field)
+#define write_pda(field,val) pda_to_op("mov",field,val)
+#define add_pda(field,val) pda_to_op("add",field,val)
+#define sub_pda(field,val) pda_to_op("sub",field,val)
+
+#endif
diff --git a/include/asm-x86_64/pgalloc.h b/include/asm-x86_64/pgalloc.h
new file mode 100644 (file)
index 0000000..1d5fb0c
--- /dev/null
@@ -0,0 +1,258 @@
+#ifndef _X86_64_PGALLOC_H
+#define _X86_64_PGALLOC_H
+
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <asm/pda.h>
+#include <linux/threads.h>
+#include <linux/mm.h>
+
+#define inc_pgcache_size() add_pda(pgtable_cache_sz,1UL)
+#define dec_pgcache_size() sub_pda(pgtable_cache_sz,1UL)
+
+#define pmd_populate(mm, pmd, pte) \
+               set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
+#define pgd_populate(mm, pgd, pmd) \
+               set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pmd)))
+
+extern __inline__ pmd_t *get_pmd_slow(void)
+{
+       pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL);
+
+       if (ret)
+               memset(ret, 0, PAGE_SIZE);
+       return ret;
+}
+
+extern __inline__ pmd_t *get_pmd_fast(void)
+{
+       unsigned long *ret;
+
+       preempt_disable(); 
+       ret = read_pda(pmd_quick);
+       if (ret) {
+               write_pda(pmd_quick, (unsigned long *)(*ret));
+               ret[0] = 0;
+               dec_pgcache_size();
+       }
+       preempt_enable(); 
+       if (!ret)
+               ret = (unsigned long *)get_pmd_slow();
+       return (pmd_t *)ret;
+}
+
+extern __inline__ void pmd_free(pmd_t *pmd)
+{
+       preempt_disable(); 
+       *(unsigned long *)pmd = (unsigned long) read_pda(pmd_quick);
+       write_pda(pmd_quick,(unsigned long *) pmd);
+       inc_pgcache_size();
+       preempt_enable(); 
+}
+
+extern __inline__ void pmd_free_slow(pmd_t *pmd)
+{
+       if ((unsigned long)pmd & (PAGE_SIZE-1)) 
+               BUG(); 
+       free_page((unsigned long)pmd);
+}
+
+static inline pmd_t *pmd_alloc_one_fast (struct mm_struct *mm, unsigned long addr)
+{
+       unsigned long *ret;
+
+       preempt_disable(); 
+       ret = (unsigned long *)read_pda(pmd_quick);
+
+       if (__builtin_expect(ret != NULL, 1)) {
+               write_pda(pmd_quick, (unsigned long *)(*ret));
+               ret[0] = 0;
+               dec_pgcache_size();
+       }
+       preempt_enable(); 
+       return (pmd_t *)ret;
+}
+
+static inline pmd_t *pmd_alloc_one (struct mm_struct *mm, unsigned long addr)
+{
+       pmd_t *pmd = (pmd_t *) __get_free_page(GFP_KERNEL);
+
+       if (__builtin_expect(pmd != NULL, 1))
+               clear_page(pmd);
+       return pmd;
+}
+
+
+static inline pgd_t *pgd_alloc_one_fast (void)
+{
+       unsigned long *ret;
+
+       preempt_disable(); 
+       ret = read_pda(pgd_quick);
+       if (likely(ret != NULL)) {
+               write_pda(pgd_quick,(unsigned long *)(*ret));
+               ret[0] = 0;
+               dec_pgcache_size();
+       }
+       preempt_enable(); 
+       return (pgd_t *) ret;
+}
+
+static inline pgd_t *pgd_alloc (struct mm_struct *mm)
+{
+       /* the VM system never calls pgd_alloc_one_fast(), so we do it here. */
+       pgd_t *pgd = pgd_alloc_one_fast();
+
+       if (pgd == NULL) {
+               pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
+               if (__builtin_expect(pgd != NULL, 1))
+                       clear_page(pgd);
+       }
+       return pgd;
+}
+
+static inline void pgd_free (pgd_t *pgd)
+{
+       preempt_disable();
+       *(unsigned long *)pgd = (unsigned long) read_pda(pgd_quick);
+       write_pda(pgd_quick,(unsigned long *) pgd);
+       inc_pgcache_size();
+       preempt_enable();
+}
+
+
+static inline void pgd_free_slow (pgd_t *pgd)
+{
+       if ((unsigned long)pgd & (PAGE_SIZE-1)) 
+               BUG(); 
+       free_page((unsigned long)pgd);
+}
+
+
+static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+       pte_t *pte;
+
+       pte = (pte_t *) __get_free_page(GFP_KERNEL);
+       if (pte)
+               clear_page(pte);
+       return pte;
+}
+
+extern __inline__ pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address)
+{
+       unsigned long *ret;
+
+       preempt_disable(); 
+       if(__builtin_expect((ret = read_pda(pte_quick)) != NULL, !0)) {  
+               write_pda(pte_quick, (unsigned long *)(*ret));
+               ret[0] = ret[1];
+               dec_pgcache_size();
+       }
+       preempt_enable(); 
+       return (pte_t *)ret;
+}
+
+/* Should really implement gc for free page table pages. This could be done with 
+   a reference count in struct page. */
+
+extern __inline__ void pte_free(pte_t *pte)
+{      
+       preempt_disable();
+       *(unsigned long *)pte = (unsigned long) read_pda(pte_quick);
+       write_pda(pte_quick, (unsigned long *) pte); 
+       inc_pgcache_size();
+       preempt_enable();
+}
+
+extern __inline__ void pte_free_slow(pte_t *pte)
+{
+       if ((unsigned long)pte & (PAGE_SIZE-1))
+               BUG();
+       free_page((unsigned long)pte); 
+}
+
+
+extern int do_check_pgt_cache(int, int);
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes TLBs
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(vma, start, end) flushes a range of pages
+ *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ *
+ * ..but the i386 has somewhat limited tlb flushing capabilities,
+ * and page-granular flushes are available only on i486 and up.
+ */
+
+#ifndef CONFIG_SMP
+
+#define flush_tlb() __flush_tlb()
+#define flush_tlb_all() __flush_tlb_all()
+#define local_flush_tlb() __flush_tlb()
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+       if (mm == current->active_mm)
+               __flush_tlb();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+       unsigned long addr)
+{
+       if (vma->vm_mm == current->active_mm)
+               __flush_tlb_one(addr);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+       unsigned long start, unsigned long end)
+{
+       if (vma->vm_mm == current->active_mm)
+               __flush_tlb();
+}
+
+#else
+
+#include <asm/smp.h>
+
+#define local_flush_tlb() \
+       __flush_tlb()
+
+extern void flush_tlb_all(void);
+extern void flush_tlb_current_task(void);
+extern void flush_tlb_mm(struct mm_struct *);
+extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+
+#define flush_tlb()    flush_tlb_current_task()
+
+static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
+{
+       flush_tlb_mm(vma->vm_mm);
+}
+
+#define TLBSTATE_OK    1
+#define TLBSTATE_LAZY  2
+
+struct tlb_state
+{
+       struct mm_struct *active_mm;
+       int state;
+       char __cacheline_padding[24];
+};
+extern struct tlb_state cpu_tlbstate[NR_CPUS];
+
+
+#endif
+
+extern inline void flush_tlb_pgtables(struct mm_struct *mm,
+                                     unsigned long start, unsigned long end)
+{
+       /* i386 does not keep any page table caches in TLB */
+}
+
+#endif /* _X86_64_PGALLOC_H */
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
new file mode 100644 (file)
index 0000000..c2061e0
--- /dev/null
@@ -0,0 +1,399 @@
+#ifndef _X86_64_PGTABLE_H
+#define _X86_64_PGTABLE_H
+
+#include <linux/config.h>
+
+/*
+ * This file contains the functions and defines necessary to modify and use
+ * the x86-64 page table tree.
+ * 
+ * x86-64 has a 4 level table setup. Generic linux MM only supports
+ * three levels. The fourth level is currently a single static page that
+ * is shared by everybody and just contains a pointer to the current
+ * three level page setup on the beginning and some kernel mappings at 
+ * the end. For more details see Documentation/x86_64/mm.txt
+ */
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <linux/threads.h>
+
+extern level4_t level4_pgt[512];
+extern pgd_t level3_kernel_pgt[512];
+extern pgd_t level3_physmem_pgt[512];
+extern pgd_t level3_ident_pgt[512], swapper_pg_dir[512];
+extern pmd_t level2_kernel_pgt[512];
+extern void paging_init(void);
+
+/* Caches aren't brain-dead on the intel. */
+#define flush_cache_all()                      do { } while (0)
+#define flush_cache_mm(mm)                     do { } while (0)
+#define flush_cache_range(vma, start, end)     do { } while (0)
+#define flush_cache_page(vma, vmaddr)          do { } while (0)
+#define flush_page_to_ram(page)                        do { } while (0)
+#define flush_dcache_page(page)                        do { } while (0)
+#define flush_icache_range(start, end)         do { } while (0)
+#define flush_icache_page(vma,pg)              do { } while (0)
+
+#define __flush_tlb()                                                  \
+       do {                                                            \
+               unsigned long tmpreg;                                   \
+                                                                       \
+               __asm__ __volatile__(                                   \
+                       "movq %%cr3, %0;  # flush TLB \n"               \
+                       "movq %0, %%cr3;              \n"               \
+                       : "=r" (tmpreg)                                 \
+                       :: "memory");                                   \
+       } while (0)
+
+/*
+ * Global pages have to be flushed a bit differently. Not a real
+ * performance problem because this does not happen often.
+ */
+#define __flush_tlb_global()                                           \
+       do {                                                            \
+               unsigned long tmpreg;                                   \
+                                                                       \
+               __asm__ __volatile__(                                   \
+                       "movq %1, %%cr4;  # turn off PGE     \n"        \
+                       "movq %%cr3, %0;  # flush TLB        \n"        \
+                       "movq %0, %%cr3;                     \n"        \
+                       "movq %2, %%cr4;  # turn PGE back on \n"        \
+                       : "=&r" (tmpreg)                                \
+                       : "r" (mmu_cr4_features & ~X86_CR4_PGE),        \
+                         "r" (mmu_cr4_features)                        \
+                       : "memory");                                    \
+       } while (0)
+
+extern unsigned long pgkern_mask;
+
+/*
+ * Do not check the PGE bit unnecesserily if this is a PPro+ kernel.
+ * FIXME: This should be cleaned up
+ */
+
+# define __flush_tlb_all() __flush_tlb_global()
+
+#define __flush_tlb_one(addr) __asm__ __volatile__("invlpg %0": :"m" (*(char *) addr))
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[1024];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+#endif /* !__ASSEMBLY__ */
+
+#define LEVEL4_SHIFT   39
+#define PTRS_PER_LEVEL4        512
+
+/*
+ * PGDIR_SHIFT determines what a top-level page table entry can map
+ */
+#define PGDIR_SHIFT    30
+#define PTRS_PER_PGD   512
+
+/*
+ * PMD_SHIFT determines the size of the area a middle-level
+ * page table can map
+ */
+#define PMD_SHIFT      21
+#define PTRS_PER_PMD   512
+
+/*
+ * entries per page directory level
+ */
+#define PTRS_PER_PTE   512
+
+#define pte_ERROR(e) \
+       printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), pte_val(e))
+#define pmd_ERROR(e) \
+       printk("%s:%d: bad pmd %p(%016lx).\n", __FILE__, __LINE__, &(e), pmd_val(e))
+#define pgd_ERROR(e) \
+       printk("%s:%d: bad pgd %p(%016lx).\n", __FILE__, __LINE__, &(e), pgd_val(e))
+
+#define level4_none(x) (!level4_val(x))
+#define pgd_none(x)    (!pgd_val(x))
+
+#define pgd_bad(x) ((pgd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE )
+
+extern inline int pgd_present(pgd_t pgd)       { return !pgd_none(pgd); }
+
+static inline void set_pte(pte_t *dst, pte_t val)
+{
+       *((unsigned long *)dst) = pte_val(val); 
+} 
+
+static inline void set_pmd(pmd_t *dst, pmd_t val)
+{
+       *((unsigned long *)dst) = pmd_val(val); 
+} 
+
+static inline void set_pgd(pgd_t *dst, pgd_t val)
+{
+       *((unsigned long *)dst) = pgd_val(val); 
+} 
+
+extern inline void __pgd_clear (pgd_t * pgd)
+{
+       set_pgd(pgd, __pgd(0));
+}
+
+extern inline void pgd_clear (pgd_t * pgd)
+{
+       __pgd_clear(pgd);
+       __flush_tlb();
+}
+
+#define pgd_page(pgd) \
+((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
+
+/* Find an entry in the second-level page table.. */
+#define pmd_offset(dir, address) ((pmd_t *) pgd_page(*(dir)) + \
+                       __pmd_offset(address))
+
+#define ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte, 0))
+#define pte_same(a, b)         ((a).pte == (b).pte)
+#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
+
+#define PMD_SIZE       (1UL << PMD_SHIFT)
+#define PMD_MASK       (~(PMD_SIZE-1))
+#define PGDIR_SIZE     (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK     (~(PGDIR_SIZE-1))
+
+#define USER_PTRS_PER_PGD      (TASK_SIZE/PGDIR_SIZE)
+#define FIRST_USER_PGD_NR      0
+
+#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
+#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
+
+#define TWOLEVEL_PGDIR_SHIFT   20
+#define BOOT_USER_L4_PTRS 1
+#define BOOT_KERNEL_L4_PTRS 511        /* But we will do it in 4rd level */
+
+
+
+#ifndef __ASSEMBLY__
+/* Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 8MB value just means that there will be a 8MB "hole" after the
+ * physical memory until the kernel virtual memory starts.  That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ */
+#define VMALLOC_OFFSET (8*1024*1024)
+#define VMALLOC_START  (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \
+                                               ~(VMALLOC_OFFSET-1))
+#define VMALLOC_VMADDR(x) ((unsigned long)(x))
+#define VMALLOC_END    (__START_KERNEL_map-PAGE_SIZE)
+
+#define _PAGE_BIT_PRESENT      0
+#define _PAGE_BIT_RW           1
+#define _PAGE_BIT_USER         2
+#define _PAGE_BIT_PWT          3
+#define _PAGE_BIT_PCD          4
+#define _PAGE_BIT_ACCESSED     5
+#define _PAGE_BIT_DIRTY                6
+#define _PAGE_BIT_PSE          7       /* 4 MB (or 2MB) page, Pentium+, if present.. */
+#define _PAGE_BIT_GLOBAL       8       /* Global TLB entry PPro+ */
+
+#define _PAGE_PRESENT  0x001
+#define _PAGE_RW       0x002
+#define _PAGE_USER     0x004
+#define _PAGE_PWT      0x008
+#define _PAGE_PCD      0x010
+#define _PAGE_ACCESSED 0x020
+#define _PAGE_DIRTY    0x040
+#define _PAGE_PSE      0x080   /* 2MB page */
+#define _PAGE_GLOBAL   0x100   /* Global TLB entry PPro+ */
+
+#define _PAGE_PROTNONE 0x080   /* If not present */
+
+#define _PAGE_TABLE    (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _KERNPG_TABLE  (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+#define PAGE_NONE      __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
+#define PAGE_SHARED    __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY      __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_READONLY  __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+
+#define __PAGE_KERNEL \
+       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define __PAGE_KERNEL_NOCACHE \
+       (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED)
+#define __PAGE_KERNEL_RO \
+       (_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
+#define __PAGE_KERNEL_VSYSCALL \
+       (_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
+
+#define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL)
+
+#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
+#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
+#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL)
+
+/*
+ * The i386 can't do page protection for execute, and considers that
+ * the same are read. Also, write permissions imply read permissions.
+ * This is the closest we can get..
+ */
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY
+#define __P101 PAGE_READONLY
+#define __P110 PAGE_COPY
+#define __P111 PAGE_COPY
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY
+#define __S101 PAGE_READONLY
+#define __S110 PAGE_SHARED
+#define __S111 PAGE_SHARED
+
+/*
+ * Define this if things work differently on an i386 and an i486:
+ * it will (on an i486) warn about kernel memory accesses that are
+ * done without a 'verify_area(VERIFY_WRITE,..)'
+ */
+#undef TEST_VERIFY_AREA
+
+/* page table for 0-4MB for everybody */
+extern unsigned long pg0[1024];
+
+/*
+ * Handling allocation failures during page table setup.
+ */
+extern void __handle_bad_pmd(pmd_t * pmd);
+extern void __handle_bad_pmd_kernel(pmd_t * pmd);
+
+#define pte_none(x)    (!pte_val(x))
+#define pte_present(x) (pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE))
+#define pte_clear(xp)  do { set_pte(xp, __pte(0)); } while (0)
+
+#define pmd_none(x)    (!pmd_val(x))
+#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
+#define pmd_clear(xp)  do { set_pmd(xp, __pmd(0)); } while (0)
+#define        pmd_bad(x)      ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE )
+
+/*
+ * Permanent address of a page. Obviously must never be
+ * called on a highmem page.
+ */
+#define page_address(page) ((page)->virtual)
+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))        /* FIXME: is this
+                                                  right? */
+#define pte_page(x) (mem_map+((unsigned long)((pte_val(x) >> PAGE_SHIFT))))
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+extern inline int pte_read(pte_t pte)          { return pte_val(pte) & _PAGE_USER; }
+extern inline int pte_exec(pte_t pte)          { return pte_val(pte) & _PAGE_USER; }
+extern inline int pte_dirty(pte_t pte)         { return pte_val(pte) & _PAGE_DIRTY; }
+extern inline int pte_young(pte_t pte)         { return pte_val(pte) & _PAGE_ACCESSED; }
+extern inline int pte_write(pte_t pte)         { return pte_val(pte) & _PAGE_RW; }
+
+extern inline pte_t pte_rdprotect(pte_t pte)   { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; }
+extern inline pte_t pte_exprotect(pte_t pte)   { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; }
+extern inline pte_t pte_mkclean(pte_t pte)     { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_DIRTY)); return pte; }
+extern inline pte_t pte_mkold(pte_t pte)       { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_ACCESSED)); return pte; }
+extern inline pte_t pte_wrprotect(pte_t pte)   { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_RW)); return pte; }
+extern inline pte_t pte_mkread(pte_t pte)      { set_pte(&pte, __pte(pte_val(pte) | _PAGE_USER)); return pte; }
+extern inline pte_t pte_mkexec(pte_t pte)      { set_pte(&pte, __pte(pte_val(pte) | _PAGE_USER)); return pte; }
+extern inline pte_t pte_mkdirty(pte_t pte)     { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; }
+extern inline pte_t pte_mkyoung(pte_t pte)     { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; }
+extern inline pte_t pte_mkwrite(pte_t pte)     { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; }
+static inline  int ptep_test_and_clear_dirty(pte_t *ptep)      { return test_and_clear_bit(_PAGE_BIT_DIRTY, ptep); }
+static inline  int ptep_test_and_clear_young(pte_t *ptep)      { return test_and_clear_bit(_PAGE_BIT_ACCESSED, ptep); }
+static inline void ptep_set_wrprotect(pte_t *ptep)             { clear_bit(_PAGE_BIT_RW, ptep); }
+static inline void ptep_mkdirty(pte_t *ptep)                   { set_bit(_PAGE_BIT_DIRTY, ptep); }
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+#define mk_pte(page,pgprot) \
+({                                                                     \
+       pte_t __pte;                                                    \
+                                                                       \
+       set_pte(&__pte, __pte(((page)-mem_map) *                        \
+               (unsigned long long)PAGE_SIZE + pgprot_val(pgprot)));   \
+       __pte;                                                          \
+})
+
+/* This takes a physical page address that is used by the remapping functions */
+#define mk_pte_phys(physpage, pgprot) \
+({ pte_t __pte; set_pte(&__pte, __pte(physpage + pgprot_val(pgprot))); __pte; })
+
+extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{ set_pte(&pte, __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot))); return pte; }
+
+#define page_pte(page) page_pte_prot(page, __pgprot(0))
+
+#define pmd_page(pmd) \
+((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+
+/* to find an entry in a page-table-directory. */
+#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+
+#define __pgd_offset(address) pgd_index(address)
+
+#define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address))
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+#define __pmd_offset(address) \
+               (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+
+/* Find an entry in the third-level page table.. */
+#define __pte_offset(address) \
+               ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \
+                       __pte_offset(address))
+
+/* never use these in the common code */
+#define level4_page(level4) ((unsigned long) __va(level4_val(level4) & PAGE_MASK))
+#define level4_index(address) ((address >> LEVEL4_SHIFT) & (PTRS_PER_LEVEL4-1))
+#define level4_offset_k(address) (level4_pgt + level4_index(address))
+#define level3_offset_k(dir, address) ((pgd_t *) level4_page(*(dir)) + pgd_index(address))
+
+/*
+ * The i386 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+#define update_mmu_cache(vma,address,pte) do { } while (0)
+
+/* Encode and de-code a swap entry */
+#define SWP_TYPE(x)                    (((x).val >> 1) & 0x3f)
+#define SWP_OFFSET(x)                  ((x).val >> 8)
+#define SWP_ENTRY(type, offset)                ((swp_entry_t) { ((type) << 1) | ((offset) << 8) })
+#define pte_to_swp_entry(pte)          ((swp_entry_t) { pte_val(pte) })
+#define swp_entry_to_pte(x)            ((pte_t) { (x).val })
+
+#endif /* !__ASSEMBLY__ */
+
+/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
+#define PageSkip(page)         (0)
+#define kern_addr_valid(addr)  (1)
+
+#define io_remap_page_range remap_page_range
+
+#define HAVE_ARCH_UNMAPPED_AREA
+
+#define pgtable_cache_init()   do { } while (0)
+
+
+#endif /* _X86_64_PGTABLE_H */
diff --git a/include/asm-x86_64/poll.h b/include/asm-x86_64/poll.h
new file mode 100644 (file)
index 0000000..d4a703d
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef __x86_64_POLL_H
+#define __x86_64_POLL_H
+
+/* These are specified by iBCS2 */
+#define POLLIN         0x0001
+#define POLLPRI                0x0002
+#define POLLOUT                0x0004
+#define POLLERR                0x0008
+#define POLLHUP                0x0010
+#define POLLNVAL       0x0020
+
+/* The rest seem to be more-or-less nonstandard. Check them! */
+#define POLLRDNORM     0x0040
+#define POLLRDBAND     0x0080
+#define POLLWRNORM     0x0100
+#define POLLWRBAND     0x0200
+#define POLLMSG                0x0400
+
+struct pollfd {
+       int fd;
+       short events;
+       short revents;
+};
+
+#endif
diff --git a/include/asm-x86_64/posix_types.h b/include/asm-x86_64/posix_types.h
new file mode 100644 (file)
index 0000000..8c60840
--- /dev/null
@@ -0,0 +1,116 @@
+#ifndef _ASM_X86_64_POSIX_TYPES_H
+#define _ASM_X86_64_POSIX_TYPES_H
+
+/*
+ * This file is generally used by user-level software, so you need to
+ * be a little careful about namespace pollution etc.  Also, we cannot
+ * assume GCC is being used.
+ */
+
+typedef unsigned long  __kernel_dev_t;
+typedef unsigned long  __kernel_ino_t;
+typedef unsigned int   __kernel_mode_t;
+typedef unsigned long  __kernel_nlink_t;
+typedef long           __kernel_off_t;
+typedef int            __kernel_pid_t;
+typedef int            __kernel_ipc_pid_t;
+typedef unsigned int   __kernel_uid_t;
+typedef unsigned int   __kernel_gid_t;
+typedef unsigned long  __kernel_size_t;
+typedef long           __kernel_ssize_t;
+typedef long           __kernel_ptrdiff_t;
+typedef long           __kernel_time_t;
+typedef long           __kernel_suseconds_t;
+typedef long           __kernel_clock_t;
+typedef int            __kernel_daddr_t;
+typedef char *         __kernel_caddr_t;
+typedef unsigned short __kernel_uid16_t;
+typedef unsigned short __kernel_gid16_t;
+
+#ifdef __GNUC__
+typedef long long      __kernel_loff_t;
+#endif
+
+typedef struct {
+       int     val[2];
+} __kernel_fsid_t;
+
+typedef __kernel_uid_t __kernel_old_uid_t;
+typedef __kernel_gid_t __kernel_old_gid_t;
+typedef __kernel_uid_t __kernel_uid32_t;
+typedef __kernel_gid_t __kernel_gid32_t;
+
+#ifdef __KERNEL__
+
+#undef __FD_SET
+static __inline__ void __FD_SET(unsigned long fd, __kernel_fd_set *fdsetp)
+{
+       unsigned long _tmp = fd / __NFDBITS;
+       unsigned long _rem = fd % __NFDBITS;
+       fdsetp->fds_bits[_tmp] |= (1UL<<_rem);
+}
+
+#undef __FD_CLR
+static __inline__ void __FD_CLR(unsigned long fd, __kernel_fd_set *fdsetp)
+{
+       unsigned long _tmp = fd / __NFDBITS;
+       unsigned long _rem = fd % __NFDBITS;
+       fdsetp->fds_bits[_tmp] &= ~(1UL<<_rem);
+}
+
+#undef __FD_ISSET
+static __inline__ int __FD_ISSET(unsigned long fd, __const__ __kernel_fd_set *p)
+{
+       unsigned long _tmp = fd / __NFDBITS;
+       unsigned long _rem = fd % __NFDBITS;
+       return (p->fds_bits[_tmp] & (1UL<<_rem)) != 0;
+}
+
+/*
+ * This will unroll the loop for the normal constant cases (8 or 32 longs,
+ * for 256 and 1024-bit fd_sets respectively)
+ */
+#undef __FD_ZERO
+static __inline__ void __FD_ZERO(__kernel_fd_set *p)
+{
+       unsigned long *tmp = p->fds_bits;
+       int i;
+
+       if (__builtin_constant_p(__FDSET_LONGS)) {
+               switch (__FDSET_LONGS) {
+                       case 32:
+                         tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+                         tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
+                         tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
+                         tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
+                         tmp[16] = 0; tmp[17] = 0; tmp[18] = 0; tmp[19] = 0;
+                         tmp[20] = 0; tmp[21] = 0; tmp[22] = 0; tmp[23] = 0;
+                         tmp[24] = 0; tmp[25] = 0; tmp[26] = 0; tmp[27] = 0;
+                         tmp[28] = 0; tmp[29] = 0; tmp[30] = 0; tmp[31] = 0;
+                         return;
+                       case 16:
+                         tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+                         tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
+                         tmp[ 8] = 0; tmp[ 9] = 0; tmp[10] = 0; tmp[11] = 0;
+                         tmp[12] = 0; tmp[13] = 0; tmp[14] = 0; tmp[15] = 0;
+                         return;
+                       case 8:
+                         tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+                         tmp[ 4] = 0; tmp[ 5] = 0; tmp[ 6] = 0; tmp[ 7] = 0;
+                         return;
+                       case 4:
+                         tmp[ 0] = 0; tmp[ 1] = 0; tmp[ 2] = 0; tmp[ 3] = 0;
+                         return;
+               }
+       }
+       i = __FDSET_LONGS;
+       while (i) {
+               i--;
+               *tmp = 0;
+               tmp++;
+       }
+}
+
+#endif /* defined(__KERNEL__) */
+
+#endif
diff --git a/include/asm-x86_64/prctl.h b/include/asm-x86_64/prctl.h
new file mode 100644 (file)
index 0000000..52952ad
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef X86_64_PRCTL_H
+#define X86_64_PRCTL_H 1
+
+#define ARCH_SET_GS 0x1001
+#define ARCH_SET_FS 0x1002
+#define ARCH_GET_FS 0x1003
+#define ARCH_GET_GS 0x1004
+
+
+#endif
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h
new file mode 100644 (file)
index 0000000..87c5f11
--- /dev/null
@@ -0,0 +1,463 @@
+/*
+ * include/asm-x86_64/processor.h
+ *
+ * Copyright (C) 1994 Linus Torvalds
+ */
+
+#ifndef __ASM_X86_64_PROCESSOR_H
+#define __ASM_X86_64_PROCESSOR_H
+
+#include <asm/segment.h>
+#include <asm/page.h>
+#include <asm/types.h>
+#include <asm/sigcontext.h>
+#include <asm/cpufeature.h>
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <asm/msr.h>
+#include <asm/current.h>
+#include <asm/system.h>
+
+#define TF_MASK                0x00000100
+#define IF_MASK                0x00000200
+#define IOPL_MASK      0x00003000
+#define NT_MASK                0x00004000
+#define VM_MASK                0x00020000
+#define AC_MASK                0x00040000
+#define VIF_MASK       0x00080000      /* virtual interrupt flag */
+#define VIP_MASK       0x00100000      /* virtual interrupt pending */
+#define ID_MASK                0x00200000
+
+#define current_text_addr() ({ void *pc; asm volatile("leaq 1f(%%rip),%0\n1:":"=r"(pc)); pc; })
+
+/*
+ *  CPU type and hardware bug flags. Kept separately for each CPU.
+ *  Members of this structure are referenced in head.S, so think twice
+ *  before touching them. [mj]
+ */
+
+struct cpuinfo_x86 {
+       __u8    x86;            /* CPU family */
+       __u8    x86_vendor;     /* CPU vendor */
+       __u8    x86_model;
+       __u8    x86_mask;
+  /* We know that wp_works_ok = 1, hlt_works_ok = 1, hard_math = 1,
+     etc... */
+       char    wp_works_ok;    /* It doesn't on 386's */
+       char    hlt_works_ok;   /* Problems on some 486Dx4's and old 386's */
+       char    hard_math;
+       char    rfu;
+       int     cpuid_level;    /* Maximum supported CPUID level, -1=no CPUID */
+       __u32   x86_capability[NCAPINTS];
+       char    x86_vendor_id[16];
+       char    x86_model_id[64];
+       int     x86_cache_size;  /* in KB - valid for CPUS which support this
+                                   call  */
+       int     fdiv_bug;
+       int     f00f_bug;
+       int     coma_bug;
+       unsigned long loops_per_jiffy;
+} ____cacheline_aligned;
+
+#define X86_VENDOR_INTEL 0
+#define X86_VENDOR_CYRIX 1
+#define X86_VENDOR_AMD 2
+#define X86_VENDOR_UMC 3
+#define X86_VENDOR_NEXGEN 4
+#define X86_VENDOR_CENTAUR 5
+#define X86_VENDOR_RISE 6
+#define X86_VENDOR_TRANSMETA 7
+#define X86_VENDOR_UNKNOWN 0xff
+
+extern struct cpuinfo_x86 boot_cpu_data;
+extern struct tss_struct init_tss[NR_CPUS];
+
+#ifdef CONFIG_SMP
+extern struct cpuinfo_x86 cpu_data[];
+#define current_cpu_data cpu_data[smp_processor_id()]
+#else
+#define cpu_data &boot_cpu_data
+#define current_cpu_data boot_cpu_data
+#endif
+
+#define cpu_has_pge 1
+#define cpu_has_pse 1
+#define cpu_has_pae 1
+#define cpu_has_tsc 1
+#define cpu_has_de 1
+#define cpu_has_vme 1
+#define cpu_has_fxsr 1
+#define cpu_has_xmm 1
+#define cpu_has_apic 1
+
+extern char ignore_irq13;
+
+extern void identify_cpu(struct cpuinfo_x86 *);
+extern void print_cpu_info(struct cpuinfo_x86 *);
+extern void dodgy_tsc(void);
+
+/*
+ * EFLAGS bits
+ */
+#define X86_EFLAGS_CF  0x00000001 /* Carry Flag */
+#define X86_EFLAGS_PF  0x00000004 /* Parity Flag */
+#define X86_EFLAGS_AF  0x00000010 /* Auxillary carry Flag */
+#define X86_EFLAGS_ZF  0x00000040 /* Zero Flag */
+#define X86_EFLAGS_SF  0x00000080 /* Sign Flag */
+#define X86_EFLAGS_TF  0x00000100 /* Trap Flag */
+#define X86_EFLAGS_IF  0x00000200 /* Interrupt Flag */
+#define X86_EFLAGS_DF  0x00000400 /* Direction Flag */
+#define X86_EFLAGS_OF  0x00000800 /* Overflow Flag */
+#define X86_EFLAGS_IOPL        0x00003000 /* IOPL mask */
+#define X86_EFLAGS_NT  0x00004000 /* Nested Task */
+#define X86_EFLAGS_RF  0x00010000 /* Resume Flag */
+#define X86_EFLAGS_VM  0x00020000 /* Virtual Mode */
+#define X86_EFLAGS_AC  0x00040000 /* Alignment Check */
+#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
+#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
+#define X86_EFLAGS_ID  0x00200000 /* CPUID detection flag */
+
+/*
+ *     Generic CPUID function
+ *     FIXME: This really belongs to msr.h
+ */
+extern inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
+{
+       __asm__("cpuid"
+               : "=a" (*eax),
+                 "=b" (*ebx),
+                 "=c" (*ecx),
+                 "=d" (*edx)
+               : "0" (op));
+}
+
+/*
+ * CPUID functions returning a single datum
+ */
+extern inline unsigned int cpuid_eax(unsigned int op)
+{
+       unsigned int eax;
+
+       __asm__("cpuid"
+               : "=a" (eax)
+               : "0" (op)
+               : "bx", "cx", "dx");
+       return eax;
+}
+extern inline unsigned int cpuid_ebx(unsigned int op)
+{
+       unsigned int eax, ebx;
+
+       __asm__("cpuid"
+               : "=a" (eax), "=b" (ebx)
+               : "0" (op)
+               : "cx", "dx" );
+       return ebx;
+}
+extern inline unsigned int cpuid_ecx(unsigned int op)
+{
+       unsigned int eax, ecx;
+
+       __asm__("cpuid"
+               : "=a" (eax), "=c" (ecx)
+               : "0" (op)
+               : "bx", "dx" );
+       return ecx;
+}
+extern inline unsigned int cpuid_edx(unsigned int op)
+{
+       unsigned int eax, edx;
+
+       __asm__("cpuid"
+               : "=a" (eax), "=d" (edx)
+               : "0" (op)
+               : "bx", "cx");
+       return edx;
+}
+
+/*
+ * Intel CPU features in CR4
+ */
+#define X86_CR4_VME            0x0001  /* enable vm86 extensions */
+#define X86_CR4_PVI            0x0002  /* virtual interrupts flag enable */
+#define X86_CR4_TSD            0x0004  /* disable time stamp at ipl 3 */
+#define X86_CR4_DE             0x0008  /* enable debugging extensions */
+#define X86_CR4_PSE            0x0010  /* enable page size extensions */
+#define X86_CR4_PAE            0x0020  /* enable physical address extensions */
+#define X86_CR4_MCE            0x0040  /* Machine check enable */
+#define X86_CR4_PGE            0x0080  /* enable global pages */
+#define X86_CR4_PCE            0x0100  /* enable performance counters at ipl 3 */
+#define X86_CR4_OSFXSR         0x0200  /* enable fast FPU save and restore */
+#define X86_CR4_OSXMMEXCPT     0x0400  /* enable unmasked SSE exceptions */
+
+/*
+ * Save the cr4 feature set we're using (ie
+ * Pentium 4MB enable and PPro Global page
+ * enable), so that any CPU's that boot up
+ * after us can get the correct flags.
+ */
+extern unsigned long mmu_cr4_features;
+
+static inline void set_in_cr4 (unsigned long mask)
+{
+       mmu_cr4_features |= mask;
+       __asm__("movq %%cr4,%%rax\n\t"
+               "orq %0,%%rax\n\t"
+               "movq %%rax,%%cr4\n"
+               : : "irg" (mask)
+               :"ax");
+}
+
+static inline void clear_in_cr4 (unsigned long mask)
+{
+       mmu_cr4_features &= ~mask;
+       __asm__("movq %%cr4,%%rax\n\t"
+               "andq %0,%%rax\n\t"
+               "movq %%rax,%%cr4\n"
+               : : "irg" (~mask)
+               :"ax");
+}
+
+#define CX86_CCR0 0xc0
+#define CX86_CCR1 0xc1
+#define CX86_CCR2 0xc2
+#define CX86_CCR3 0xc3
+#define CX86_CCR4 0xe8
+#define CX86_CCR5 0xe9
+#define CX86_CCR6 0xea
+#define CX86_CCR7 0xeb
+#define CX86_DIR0 0xfe
+#define CX86_DIR1 0xff
+#define CX86_ARR_BASE 0xc4
+#define CX86_RCR_BASE 0xdc
+
+/*
+ *      Cyrix CPU indexed register access macros
+ */
+
+#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
+
+#define setCx86(reg, data) do { \
+       outb((reg), 0x22); \
+       outb((data), 0x23); \
+} while (0)
+
+
+/*
+ * Bus types
+ */
+#define EISA_bus 0
+#define MCA_bus 0
+#define MCA_bus__is_a_macro
+
+
+/*
+ * User space process size: 512GB - 1GB (default).
+ */
+#define TASK_SIZE      (0x0000007fc0000000)
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_32 0x40000000
+#define TASK_UNMAPPED_64 (TASK_SIZE/3) 
+#define TASK_UNMAPPED_BASE     \
+       ((current->thread.flags & THREAD_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64)  
+
+/*
+ * Size of io_bitmap in longwords: 32 is ports 0-0x3ff.
+ */
+#define IO_BITMAP_SIZE 32
+#define IO_BITMAP_OFFSET offsetof(struct tss_struct,io_bitmap)
+#define INVALID_IO_BITMAP_OFFSET 0x8000
+
+/* We'll have to decide which format to use for floating stores, and
+   kill all others... */
+struct i387_fsave_struct {
+       u32     cwd;
+       u32     swd;
+       u32     twd;
+       u32     fip;
+       u32     fcs;
+       u32     foo;
+       u32     fos;
+       u32     st_space[20];   /* 8*10 bytes for each FP-reg = 80 bytes */
+       u32     status;         /* software status information */
+};
+
+struct i387_fxsave_struct {
+       u16     cwd;
+       u16     swd;
+       u16     twd;
+       u16     fop;
+       u32     fip;
+       u32     fcs;
+       u32     foo;
+       u32     fos;
+       u32     mxcsr;
+       u32     reserved;
+       u32     st_space[32];   /* 8*16 bytes for each FP-reg = 128 bytes */
+       u32     xmm_space[32];  /* 8*16 bytes for each XMM-reg = 128 bytes */
+       u32     padding[56];
+} __attribute__ ((aligned (16)));
+
+struct i387_soft_struct {
+       u32     cwd;
+       u32     swd;
+       u32     twd;
+       u32     fip;
+       u32     fcs;
+       u32     foo;
+       u32     fos;
+       u32     st_space[20];   /* 8*10 bytes for each FP-reg = 80 bytes */
+       unsigned char   ftop, changed, lookahead, no_update, rm, alimit;
+       struct info     *info;
+       unsigned long   entry_eip;
+};
+
+union i387_union {
+       struct i387_fsave_struct        fsave;
+       struct i387_fxsave_struct       fxsave;
+       struct i387_soft_struct soft;
+};
+
+typedef struct {
+       unsigned long seg;
+} mm_segment_t;
+
+struct tss_struct {
+       u32 reserved1;
+       u64 rsp0;       
+       u64 rsp1;
+       u64 rsp2;
+       u64 reserved2;
+       u64 ist[7];
+       u32 reserved3;
+       u32 reserved4;
+       u16 reserved5;
+       u16 io_map_base;
+       u32 io_bitmap[IO_BITMAP_SIZE];
+} __attribute__((packed));
+
+struct thread_struct {
+       unsigned long   rsp0;
+       unsigned long   rip;
+       unsigned long   rsp;
+       unsigned long   userrsp;        /* Copy from PDA */ 
+       unsigned long   fs;
+       unsigned long   gs;
+       unsigned short  es, ds, fsindex, gsindex;       
+/* Hardware debugging registers */
+       unsigned long   debugreg[8];  /* %%db0-7 debug registers */
+/* fault info */
+       unsigned long   cr2, trap_no, error_code;
+/* floating point info */
+       union i387_union        i387;
+/* IO permissions. the bitmap could be moved into the GDT, that would make
+   switch faster for a limited number of ioperm using tasks. -AK */
+       int             ioperm;
+       u32     io_bitmap[IO_BITMAP_SIZE+1];
+};
+
+#define INIT_THREAD  {                         \
+}
+
+#define INIT_MMAP \
+{ &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL }
+
+
+#ifndef CONFIG_SMP
+extern char stackfault_stack[]; 
+#define STACKDESC rsp2: (unsigned long)stackfault_stack,
+#define STACKFAULT_STACK 2
+#else
+#define STACKFAULT_STACK 0
+#define STACKDESC
+#endif
+
+/* Doublefault currently shares the same stack on all CPUs. Hopefully
+   only one gets into this unfortunate condition at a time. Cannot do
+   the same for SF because that can be easily triggered by user
+   space. */
+#define INIT_TSS  {                                            \
+       rsp1: (unsigned long)doublefault_stack,                 \
+       STACKDESC \
+}
+
+extern char doublefault_stack[];
+
+#define start_thread(regs,new_rip,new_rsp) do { \
+       __asm__("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0));           \
+       wrmsrl(MSR_KERNEL_GS_BASE, 0);                                           \
+       (regs)->rip = (new_rip);                                                 \
+       (regs)->rsp = (new_rsp);                                                 \
+       write_pda(oldrsp, (new_rsp));                                            \
+       (regs)->cs = __USER_CS;                                                  \
+       (regs)->ss = __USER_DS;                                                  \
+       (regs)->eflags = 0x200;                                                  \
+       set_fs(USER_DS);                                                         \
+} while(0) 
+
+struct task_struct;
+struct mm_struct;
+
+/* Free all resources held by a thread. */
+extern void release_thread(struct task_struct *);
+/*
+ * create a kernel thread without removing it from tasklists
+ */
+extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+
+/* Copy and release all segment info associated with a VM */
+extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
+extern void release_segments(struct mm_struct * mm);
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+extern inline unsigned long thread_saved_pc(struct task_struct *t)
+{
+       return -1;  /* FIXME */
+}
+
+unsigned long get_wchan(struct task_struct *p);
+
+
+/* FIXME: this is incorrect when the task is sleeping in a syscall entered
+   through SYSCALL. */ 
+#define __kstk_regs(tsk)  \
+       ((struct pt_regs *)\
+       (((char *)(tsk)->thread_info) + THREAD_SIZE - sizeof(struct pt_regs)))
+#define KSTK_EIP(tsk) (__kstk_regs(tsk)->rip)
+#define KSTK_ESP(tsk) (__kstk_regs(tsk)->rsp)
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+extern inline void rep_nop(void)
+{
+       __asm__ __volatile__("rep;nop");
+}
+
+#define cpu_has_fpu 1
+
+/* 3d now! prefetch instructions. Could also use the SSE flavours; not sure
+   if it makes a difference. gcc 3.1 has __builtin_prefetch too, but I am
+   not sure it makes sense to use them. */ 
+
+#define ARCH_HAS_PREFETCH
+#define ARCH_HAS_PREFETCHW
+#define ARCH_HAS_SPINLOCK_PREFETCH
+
+extern inline void prefetch(const void *x)
+{
+    __asm__ __volatile__ ("prefetch (%0)" : : "r"(x));
+}
+
+extern inline void prefetchw(const void *x)
+{
+    __asm__ __volatile__ ("prefetchw (%0)" : : "r"(x));
+}
+#define spin_lock_prefetch(x)  prefetchw(x)
+#define cpu_relax()   rep_nop()
+
+
+#endif /* __ASM_X86_64_PROCESSOR_H */
diff --git a/include/asm-x86_64/ptrace.h b/include/asm-x86_64/ptrace.h
new file mode 100644 (file)
index 0000000..1ab2b62
--- /dev/null
@@ -0,0 +1,114 @@
+#ifndef _X86_64_PTRACE_H
+#define _X86_64_PTRACE_H
+
+#ifdef __ASSEMBLY__
+#define R15 0
+#define R14 8
+#define R13 16
+#define R12 24
+#define RBP 36
+#define RBX 40
+/* arguments: interrupts/non tracing syscalls only save upto here*/
+#define R11 48
+#define R10 56 
+#define R9 64
+#define R8 72
+#define RAX 80
+#define RCX 88
+#define RDX 96
+#define RSI 104
+#define RDI 112
+#define ORIG_RAX 120       /* = ERROR */ 
+/* end of arguments */         
+/* cpu exception frame or undefined in case of fast syscall. */
+#define RIP 128
+#define CS 136
+#define EFLAGS 144
+#define RSP 152
+#define SS 160
+#define ARGOFFSET R11
+#endif /* __ASSEMBLY__ */
+
+/* top of stack page */ 
+#define FRAME_SIZE 168
+
+#define PTRACE_SETOPTIONS         21
+
+/* options set using PTRACE_SETOPTIONS */
+#define PTRACE_O_TRACESYSGOOD     0x00000001
+
+/* Dummy values for ptrace */ 
+#define FS 1000 
+#define GS 1008
+
+#ifndef __ASSEMBLY__ 
+
+struct pt_regs {
+       unsigned long r15;
+       unsigned long r14;
+       unsigned long r13;
+       unsigned long r12;
+       unsigned long rbp;
+       unsigned long rbx;
+/* arguments: non interrupts/non tracing syscalls only save upto here*/
+       unsigned long r11;
+       unsigned long r10;      
+       unsigned long r9;
+       unsigned long r8;
+       unsigned long rax;
+       unsigned long rcx;
+       unsigned long rdx;
+       unsigned long rsi;
+       unsigned long rdi;
+       unsigned long orig_rax;
+/* end of arguments */         
+/* cpu exception frame or undefined */
+       unsigned long rip;
+       unsigned long cs;
+       unsigned long eflags; 
+       unsigned long rsp; 
+       unsigned long ss;
+/* top of stack page */ 
+};
+
+#endif
+
+/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
+#define PTRACE_GETREGS            12
+#define PTRACE_SETREGS            13
+#define PTRACE_GETFPREGS          14
+#define PTRACE_SETFPREGS          15
+#define PTRACE_GETFPXREGS         18
+#define PTRACE_SETFPXREGS         19
+
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__) 
+#define user_mode(regs) ((regs)->rsp <= PAGE_OFFSET)
+#define instruction_pointer(regs) ((regs)->eip)
+extern void show_regs(struct pt_regs *);
+
+enum {
+        EF_CF   = 0x00000001,
+        EF_PF   = 0x00000004,
+        EF_AF   = 0x00000010,
+        EF_ZF   = 0x00000040,
+        EF_SF   = 0x00000080,
+        EF_TF   = 0x00000100,
+        EF_IE   = 0x00000200,
+        EF_DF   = 0x00000400,
+        EF_OF   = 0x00000800,
+        EF_IOPL = 0x00003000,
+        EF_IOPL_RING0 = 0x00000000,
+        EF_IOPL_RING1 = 0x00001000,
+        EF_IOPL_RING2 = 0x00002000,
+        EF_NT   = 0x00004000,   /* nested task */
+        EF_RF   = 0x00010000,   /* resume */
+        EF_VM   = 0x00020000,   /* virtual mode */
+        EF_AC   = 0x00040000,   /* alignment */
+        EF_VIF  = 0x00080000,   /* virtual interrupt */
+        EF_VIP  = 0x00100000,   /* virtual interrupt pending */
+        EF_ID   = 0x00200000,   /* id */
+};
+
+#endif
+
+#endif
diff --git a/include/asm-x86_64/resource.h b/include/asm-x86_64/resource.h
new file mode 100644 (file)
index 0000000..b430c85
--- /dev/null
@@ -0,0 +1,47 @@
+#ifndef _X8664_RESOURCE_H
+#define _X8664_RESOURCE_H
+
+/*
+ * Resource limits
+ */
+
+#define RLIMIT_CPU     0               /* CPU time in ms */
+#define RLIMIT_FSIZE   1               /* Maximum filesize */
+#define RLIMIT_DATA    2               /* max data size */
+#define RLIMIT_STACK   3               /* max stack size */
+#define RLIMIT_CORE    4               /* max core file size */
+#define RLIMIT_RSS     5               /* max resident set size */
+#define RLIMIT_NPROC   6               /* max number of processes */
+#define RLIMIT_NOFILE  7               /* max number of open files */
+#define RLIMIT_MEMLOCK 8               /* max locked-in-memory address space */
+#define RLIMIT_AS      9               /* address space limit */
+#define RLIMIT_LOCKS   10              /* maximum file locks held */
+
+#define RLIM_NLIMITS   11
+
+/*
+ * SuS says limits have to be unsigned.
+ * Which makes a ton more sense anyway.
+ */
+#define RLIM_INFINITY  (~0UL)
+
+#ifdef __KERNEL__
+
+#define INIT_RLIMITS                                   \
+{                                                      \
+       { RLIM_INFINITY, RLIM_INFINITY },               \
+       { RLIM_INFINITY, RLIM_INFINITY },               \
+       { RLIM_INFINITY, RLIM_INFINITY },               \
+       {      _STK_LIM, RLIM_INFINITY },               \
+       {             0, RLIM_INFINITY },               \
+       { RLIM_INFINITY, RLIM_INFINITY },               \
+       {             0,             0 },               \
+       {      INR_OPEN,     INR_OPEN  },               \
+       { RLIM_INFINITY, RLIM_INFINITY },               \
+       { RLIM_INFINITY, RLIM_INFINITY },               \
+        { RLIM_INFINITY, RLIM_INFINITY },              \
+}
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/include/asm-x86_64/rwlock.h b/include/asm-x86_64/rwlock.h
new file mode 100644 (file)
index 0000000..3fe63df
--- /dev/null
@@ -0,0 +1,84 @@
+/* include/asm-x86_64/rwlock.h
+ *
+ *     Helpers used by both rw spinlocks and rw semaphores.
+ *
+ *     Based in part on code from semaphore.h and
+ *     spinlock.h Copyright 1996 Linus Torvalds.
+ *
+ *     Copyright 1999 Red Hat, Inc.
+ *     Copyright 2001 SuSE labs 
+ *
+ *     Written by Benjamin LaHaise.
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+#ifndef _ASM_X86_64_RWLOCK_H
+#define _ASM_X86_64_RWLOCK_H
+
+#define RW_LOCK_BIAS            0x01000000
+#define RW_LOCK_BIAS_STR       "0x01000000"
+
+#define __build_read_lock_ptr(rw, helper)   \
+       asm volatile(LOCK "subl $1,(%0)\n\t" \
+                    "js 2f\n" \
+                    "1:\n" \
+                    ".section .text.lock,\"ax\"\n" \
+                    "2:\tcall " helper "\n\t" \
+                    "jmp 1b\n" \
+                    ".previous" \
+                    ::"d" (rw) : "memory")
+
+#define __build_read_lock_const(rw, helper)   \
+       asm volatile(LOCK "subl $1,%0\n\t" \
+                    "js 2f\n" \
+                    "1:\n" \
+                    ".section .text.lock,\"ax\"\n" \
+                    "2:\tpushq %%rax\n\t" \
+                    "leal %0,%%eax\n\t" \
+                    "call " helper "\n\t" \
+                    "popq %%rax\n\t" \
+                    "jmp 1b\n" \
+                    ".previous" \
+                    :"=m" (*((volatile int *)rw))::"memory")
+
+#define __build_read_lock(rw, helper)  do { \
+                                               if (__builtin_constant_p(rw)) \
+                                                       __build_read_lock_const(rw, helper); \
+                                               else \
+                                                       __build_read_lock_ptr(rw, helper); \
+                                       } while (0)
+
+#define __build_write_lock_ptr(rw, helper) \
+       asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
+                    "jnz 2f\n" \
+                    "1:\n" \
+                    ".section .text.lock,\"ax\"\n" \
+                    "2:\tcall " helper "\n\t" \
+                    "jmp 1b\n" \
+                    ".previous" \
+                    ::"d" (rw) : "memory")
+
+#define __build_write_lock_const(rw, helper) \
+       asm volatile(LOCK "subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" \
+                    "jnz 2f\n" \
+                    "1:\n" \
+                    ".section .text.lock,\"ax\"\n" \
+                    "2:\tpushq %%rax\n\t" \
+                    "leaq %0,%%rax\n\t" \
+                    "call " helper "\n\t" \
+                    "popq %%rax\n\t" \
+                    "jmp 1b\n" \
+                    ".previous" \
+                    :"=m" (*((volatile long *)rw))::"memory")
+
+#define __build_write_lock(rw, helper) do { \
+                                               if (__builtin_constant_p(rw)) \
+                                                       __build_write_lock_const(rw, helper); \
+                                               else \
+                                                       __build_write_lock_ptr(rw, helper); \
+                                       } while (0)
+
+#endif
diff --git a/include/asm-x86_64/rwsem.h b/include/asm-x86_64/rwsem.h
new file mode 100644 (file)
index 0000000..e77847d
--- /dev/null
@@ -0,0 +1,217 @@
+/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for x86_64+
+ *
+ * Written by David Howells (dhowells@redhat.com).
+ * Ported by Andi Kleen <ak@suse.de> to x86-64.
+ *
+ * Derived from asm-i386/semaphore.h and asm-i386/rwsem.h
+ *
+ *
+ * The MSW of the count is the negated number of active writers and waiting
+ * lockers, and the LSW is the total number of active locks
+ *
+ * The lock count is initialized to 0 (no active and no waiting lockers).
+ *
+ * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
+ * uncontended lock. This can be determined because XADD returns the old value.
+ * Readers increment by 1 and see a positive value when uncontended, negative
+ * if there are writers (and maybe) readers waiting (in which case it goes to
+ * sleep).
+ *
+ * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
+ * be extended to 65534 by manually checking the whole MSW rather than relying
+ * on the S flag.
+ *
+ * The value of ACTIVE_BIAS supports up to 65535 active processes.
+ *
+ * This should be totally fair - if anything is waiting, a process that wants a
+ * lock will go to the back of the queue. When the currently active lock is
+ * released, if there's a writer at the front of the queue, then that and only
+ * that will be woken up; if there's a bunch of consequtive readers at the
+ * front, then they'll all be woken up, but no other readers will be.
+ */
+
+#ifndef _X8664_RWSEM_H
+#define _X8664_RWSEM_H
+
+#ifndef _LINUX_RWSEM_H
+#error please dont include asm/rwsem.h directly, use linux/rwsem.h instead
+#endif
+
+#ifdef __KERNEL__
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct rwsem_waiter;
+
+extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
+extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
+
+/*
+ * the semaphore definition
+ */
+struct rw_semaphore {
+       signed long             count;
+#define RWSEM_UNLOCKED_VALUE           0x00000000
+#define RWSEM_ACTIVE_BIAS              0x00000001
+#define RWSEM_ACTIVE_MASK              0x0000ffff
+#define RWSEM_WAITING_BIAS             (-0x00010000)
+#define RWSEM_ACTIVE_READ_BIAS         RWSEM_ACTIVE_BIAS
+#define RWSEM_ACTIVE_WRITE_BIAS                (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+       spinlock_t              wait_lock;
+       struct list_head        wait_list;
+#if RWSEM_DEBUG
+       int                     debug;
+#endif
+};
+
+/*
+ * initialisation
+ */
+#if RWSEM_DEBUG
+#define __RWSEM_DEBUG_INIT      , 0
+#else
+#define __RWSEM_DEBUG_INIT     /* */
+#endif
+
+#define __RWSEM_INITIALIZER(name) \
+{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \
+       __RWSEM_DEBUG_INIT }
+
+#define DECLARE_RWSEM(name) \
+       struct rw_semaphore name = __RWSEM_INITIALIZER(name)
+
+static inline void init_rwsem(struct rw_semaphore *sem)
+{
+       sem->count = RWSEM_UNLOCKED_VALUE;
+       spin_lock_init(&sem->wait_lock);
+       INIT_LIST_HEAD(&sem->wait_list);
+#if RWSEM_DEBUG
+       sem->debug = 0;
+#endif
+}
+
+/*
+ * lock for reading
+ */
+static inline void __down_read(struct rw_semaphore *sem)
+{
+       __asm__ __volatile__(
+               "# beginning down_read\n\t"
+LOCK_PREFIX    "  incl      (%%rax)\n\t" /* adds 0x00000001, returns the old value */
+               "  js        2f\n\t" /* jump if we weren't granted the lock */
+               "1:\n\t"
+               ".section .text.lock,\"ax\"\n"
+               "2:\n\t"
+               "  call      rwsem_down_read_failed_thunk\n\t"
+               "  jmp       1b\n"
+               ".previous"
+               "# ending down_read\n\t"
+               : "+m"(sem->count)
+               : "a"(sem)
+               : "memory", "cc");
+}
+
+/*
+ * lock for writing
+ */
+static inline void __down_write(struct rw_semaphore *sem)
+{
+       int tmp;
+
+       tmp = RWSEM_ACTIVE_WRITE_BIAS;
+       __asm__ __volatile__(
+               "# beginning down_write\n\t"
+LOCK_PREFIX    "  xadd      %0,(%%rax)\n\t" /* subtract 0x0000ffff, returns the old value */
+               "  testl     %0,%0\n\t" /* was the count 0 before? */
+               "  jnz       2f\n\t" /* jump if we weren't granted the lock */
+               "1:\n\t"
+               ".section .text.lock,\"ax\"\n"
+               "2:\n\t"
+               "  call      rwsem_down_write_failed_thunk\n\t"
+               "  jmp       1b\n"
+               ".previous\n"
+               "# ending down_write"
+               : "=r" (tmp) 
+               : "0"(tmp), "a"(sem)
+               : "memory", "cc");
+}
+
+/*
+ * unlock after reading
+ */
+static inline void __up_read(struct rw_semaphore *sem)
+{
+       __s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
+       __asm__ __volatile__(
+               "# beginning __up_read\n\t"
+LOCK_PREFIX    "  xadd      %%edx,(%%rax)\n\t" /* subtracts 1, returns the old value */
+               "  js        2f\n\t" /* jump if the lock is being waited upon */
+               "1:\n\t"
+               ".section .text.lock,\"ax\"\n"
+               "2:\n\t"
+               "  decw      %%dx\n\t" /* do nothing if still outstanding active readers */
+               "  jnz       1b\n\t"
+               "  call      rwsem_wake_thunk\n\t"
+               "  jmp       1b\n"
+               ".previous\n"
+               "# ending __up_read\n"
+               : "+m"(sem->count), "+d"(tmp)
+               : "a"(sem)
+               : "memory", "cc");
+}
+
+/*
+ * unlock after writing
+ */
+static inline void __up_write(struct rw_semaphore *sem)
+{
+       __asm__ __volatile__(
+               "# beginning __up_write\n\t"
+               "  movl      %2,%%edx\n\t"
+LOCK_PREFIX    "  xaddl     %%edx,(%%rax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
+               "  jnz       2f\n\t" /* jump if the lock is being waited upon */
+               "1:\n\t"
+               ".section .text.lock,\"ax\"\n"
+               "2:\n\t"
+               "  decw      %%dx\n\t" /* did the active count reduce to 0? */
+               "  jnz       1b\n\t" /* jump back if not */
+               "  call      rwsem_wake_thunk\n\t"
+               "  jmp       1b\n"
+               ".previous\n"
+               "# ending __up_write\n"
+               : "+m"(sem->count)
+               : "a"(sem), "i"(-RWSEM_ACTIVE_WRITE_BIAS)
+               : "memory", "cc", "edx");
+}
+
+/*
+ * implement atomic add functionality
+ */
+static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
+{
+       __asm__ __volatile__(
+LOCK_PREFIX    "addl %1,%0"
+               :"=m"(sem->count)
+               :"ir"(delta), "m"(sem->count));
+}
+
+/*
+ * implement exchange and add functionality
+ */
+static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
+{
+       int tmp = delta;
+
+       __asm__ __volatile__(
+LOCK_PREFIX    "xadd %0,(%2)"
+               : "=r"(tmp), "=m"(sem->count)
+               : "r"(sem), "m"(sem->count), "0" (tmp)
+               : "memory");
+
+       return tmp+delta;
+}
+
+#endif /* __KERNEL__ */
+#endif /* _I386_RWSEM_H */
diff --git a/include/asm-x86_64/scatterlist.h b/include/asm-x86_64/scatterlist.h
new file mode 100644 (file)
index 0000000..b8d1289
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef _X8664_SCATTERLIST_H
+#define _X8664_SCATTERLIST_H
+
+struct scatterlist {
+    struct page                *page;
+    unsigned int       offset;
+    dma_addr_t         dma_address;
+    unsigned int       length;
+};
+
+#define ISA_DMA_THRESHOLD (0x00ffffff)
+
+#endif 
diff --git a/include/asm-x86_64/segment.h b/include/asm-x86_64/segment.h
new file mode 100644 (file)
index 0000000..e1dec83
--- /dev/null
@@ -0,0 +1,19 @@
+#ifndef _ASM_SEGMENT_H
+#define _ASM_SEGMENT_H
+
+#define __KERNEL_CS    0x10
+#define __KERNEL_DS    0x18
+
+/* 
+ * we cannot use the same code segment descriptor for user and kernel
+ * even not in the long flat model, because of different DPL /kkeil 
+ * The segment offset needs to contain a RPL. Grr. -AK
+ * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets) 
+ */
+
+#define __USER32_CS   0x23   /* 4*8+3 */ 
+#define __USER_DS     0x2b   /* 5*8+3 */ 
+#define __USER_CS     0x33   /* 6*8+3 */ 
+#define __USER32_DS    __USER_DS 
+
+#endif
diff --git a/include/asm-x86_64/semaphore.h b/include/asm-x86_64/semaphore.h
new file mode 100644 (file)
index 0000000..dd5f668
--- /dev/null
@@ -0,0 +1,216 @@
+#ifndef _X86_64_SEMAPHORE_H
+#define _X86_64_SEMAPHORE_H
+
+#include <linux/linkage.h>
+
+#ifdef __KERNEL__
+
+/*
+ * SMP- and interrupt-safe semaphores..
+ *
+ * (C) Copyright 1996 Linus Torvalds
+ *
+ * Modified 1996-12-23 by Dave Grothe <dave@gcom.com> to fix bugs in
+ *                     the original code and to make semaphore waits
+ *                     interruptible so that processes waiting on
+ *                     semaphores can be killed.
+ * Modified 1999-02-14 by Andrea Arcangeli, split the sched.c helper
+ *                    functions in asm/sempahore-helper.h while fixing a
+ *                    potential and subtle race discovered by Ulrich Schmid
+ *                    in down_interruptible(). Since I started to play here I
+ *                    also implemented the `trylock' semaphore operation.
+ *          1999-07-02 Artur Skawina <skawina@geocities.com>
+ *                     Optimized "0(ecx)" -> "(ecx)" (the assembler does not
+ *                     do this). Changed calling sequences from push/jmp to
+ *                     traditional call/ret.
+ * Modified 2001-01-01 Andreas Franck <afranck@gmx.de>
+ *                    Some hacks to ensure compatibility with recent
+ *                    GCC snapshots, to avoid stack corruption when compiling
+ *                    with -fomit-frame-pointer. It's not sure if this will
+ *                    be fixed in GCC, as our previous implementation was a
+ *                    bit dubious.
+ *
+ * If you would like to see an analysis of this implementation, please
+ * ftp to gcom.com and download the file
+ * /pub/linux/src/semaphore/semaphore-2.0.24.tar.gz.
+ *
+ */
+
+#include <asm/system.h>
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+#include <linux/wait.h>
+#include <linux/rwsem.h>
+
+struct semaphore {
+       atomic_t count;
+       int sleepers;
+       wait_queue_head_t wait;
+#if WAITQUEUE_DEBUG
+       long __magic;
+#endif
+};
+
+#if WAITQUEUE_DEBUG
+# define __SEM_DEBUG_INIT(name) \
+               , (int)&(name).__magic
+#else
+# define __SEM_DEBUG_INIT(name)
+#endif
+
+#define __SEMAPHORE_INITIALIZER(name,count) \
+{ ATOMIC_INIT(count), 0, __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \
+       __SEM_DEBUG_INIT(name) }
+
+#define __MUTEX_INITIALIZER(name) \
+       __SEMAPHORE_INITIALIZER(name,1)
+
+#define __DECLARE_SEMAPHORE_GENERIC(name,count) \
+       struct semaphore name = __SEMAPHORE_INITIALIZER(name,count)
+
+#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1)
+#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name,0)
+
+static inline void sema_init (struct semaphore *sem, int val)
+{
+/*
+ *     *sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val);
+ *
+ * i'd rather use the more flexible initialization above, but sadly
+ * GCC 2.7.2.3 emits a bogus warning. EGCS doesnt. Oh well.
+ */
+       atomic_set(&sem->count, val);
+       sem->sleepers = 0;
+       init_waitqueue_head(&sem->wait);
+#if WAITQUEUE_DEBUG
+       sem->__magic = (int)&sem->__magic;
+#endif
+}
+
+static inline void init_MUTEX (struct semaphore *sem)
+{
+       sema_init(sem, 1);
+}
+
+static inline void init_MUTEX_LOCKED (struct semaphore *sem)
+{
+       sema_init(sem, 0);
+}
+
+asmlinkage void __down_failed(void /* special register calling convention */);
+asmlinkage int  __down_failed_interruptible(void  /* params in registers */);
+asmlinkage int  __down_failed_trylock(void  /* params in registers */);
+asmlinkage void __up_wakeup(void /* special register calling convention */);
+
+asmlinkage void __down(struct semaphore * sem);
+asmlinkage int  __down_interruptible(struct semaphore * sem);
+asmlinkage int  __down_trylock(struct semaphore * sem);
+asmlinkage void __up(struct semaphore * sem);
+
+/*
+ * This is ugly, but we want the default case to fall through.
+ * "__down_failed" is a special asm handler that calls the C
+ * routine that actually waits. See arch/i386/kernel/semaphore.c
+ */
+static inline void down(struct semaphore * sem)
+{
+#if WAITQUEUE_DEBUG
+       CHECK_MAGIC(sem->__magic);
+#endif
+
+       __asm__ __volatile__(
+               "# atomic down operation\n\t"
+               LOCK "decl %0\n\t"     /* --sem->count */
+               "js 2f\n"
+               "1:\n"
+               ".section .text.lock,\"ax\"\n"
+               "2:\tcall __down_failed\n\t"
+               "jmp 1b\n"
+               ".previous"
+               :"=m" (sem->count)
+               :"D" (sem)
+               :"memory");
+}
+
+/*
+ * Interruptible try to acquire a semaphore.  If we obtained
+ * it, return zero.  If we were interrupted, returns -EINTR
+ */
+static inline int down_interruptible(struct semaphore * sem)
+{
+       int result;
+
+#if WAITQUEUE_DEBUG
+       CHECK_MAGIC(sem->__magic);
+#endif
+
+       __asm__ __volatile__(
+               "# atomic interruptible down operation\n\t"
+               LOCK "decl %1\n\t"     /* --sem->count */
+               "js 2f\n\t"
+               "xorl %0,%0\n"
+               "1:\n"
+               ".section .text.lock,\"ax\"\n"
+               "2:\tcall __down_failed_interruptible\n\t"
+               "jmp 1b\n"
+               ".previous"
+               :"=a" (result), "=m" (sem->count)
+               :"D" (sem)
+               :"memory");
+       return result;
+}
+
+/*
+ * Non-blockingly attempt to down() a semaphore.
+ * Returns zero if we acquired it
+ */
+static inline int down_trylock(struct semaphore * sem)
+{
+       int result;
+
+#if WAITQUEUE_DEBUG
+       CHECK_MAGIC(sem->__magic);
+#endif
+
+       __asm__ __volatile__(
+               "# atomic interruptible down operation\n\t"
+               LOCK "decl %1\n\t"     /* --sem->count */
+               "js 2f\n\t"
+               "xorl %0,%0\n"
+               "1:\n"
+               ".section .text.lock,\"ax\"\n"
+               "2:\tcall __down_failed_trylock\n\t"
+               "jmp 1b\n"
+               ".previous"
+               :"=a" (result), "=m" (sem->count)
+               :"D" (sem)
+               :"memory");
+       return result;
+}
+
+/*
+ * Note! This is subtle. We jump to wake people up only if
+ * the semaphore was negative (== somebody was waiting on it).
+ * The default case (no contention) will result in NO
+ * jumps for both down() and up().
+ */
+static inline void up(struct semaphore * sem)
+{
+#if WAITQUEUE_DEBUG
+       CHECK_MAGIC(sem->__magic);
+#endif
+       __asm__ __volatile__(
+               "# atomic up operation\n\t"
+               LOCK "incl %0\n\t"     /* ++sem->count */
+               "jle 2f\n"
+               "1:\n"
+               ".section .text.lock,\"ax\"\n"
+               "2:\tcall __up_wakeup\n\t"
+               "jmp 1b\n"
+               ".previous"
+               :"=m" (sem->count)
+               :"D" (sem)
+               :"memory");
+}
+#endif /* __KERNEL__ */
+#endif
diff --git a/include/asm-x86_64/sembuf.h b/include/asm-x86_64/sembuf.h
new file mode 100644 (file)
index 0000000..63b5292
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef _X86_64_SEMBUF_H
+#define _X86_64_SEMBUF_H
+
+/* 
+ * The semid64_ds structure for x86_64 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 64-bit time_t to solve y2038 problem
+ * - 2 miscellaneous 32-bit values
+ */
+
+struct semid64_ds {
+       struct ipc64_perm sem_perm;             /* permissions .. see ipc.h */
+       __kernel_time_t sem_otime;              /* last semop time */
+       unsigned long   __unused1;
+       __kernel_time_t sem_ctime;              /* last change time */
+       unsigned long   __unused2;
+       unsigned long   sem_nsems;              /* no. of semaphores in array */
+       unsigned long   __unused3;
+       unsigned long   __unused4;
+};
+
+#endif /* _X86_64_SEMBUF_H */
diff --git a/include/asm-x86_64/serial.h b/include/asm-x86_64/serial.h
new file mode 100644 (file)
index 0000000..067ff0a
--- /dev/null
@@ -0,0 +1,133 @@
+/*
+ * include/asm-x86_64/serial.h
+ */
+
+#include <linux/config.h>
+
+/*
+ * This assumes you have a 1.8432 MHz clock for your UART.
+ *
+ * It'd be nice if someone built a serial card with a 24.576 MHz
+ * clock, since the 16550A is capable of handling a top speed of 1.5
+ * megabits/second; but this requires the faster clock.
+ */
+#define BASE_BAUD ( 1843200 / 16 )
+
+/* Standard COM flags (except for COM4, because of the 8514 problem) */
+#ifdef CONFIG_SERIAL_DETECT_IRQ
+#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST | ASYNC_AUTO_IRQ)
+#define STD_COM4_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_AUTO_IRQ)
+#else
+#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST)
+#define STD_COM4_FLAGS ASYNC_BOOT_AUTOCONF
+#endif
+
+#ifdef CONFIG_SERIAL_MANY_PORTS
+#define FOURPORT_FLAGS ASYNC_FOURPORT
+#define ACCENT_FLAGS 0
+#define BOCA_FLAGS 0
+#define HUB6_FLAGS 0
+#define RS_TABLE_SIZE  64
+#else
+#define RS_TABLE_SIZE
+#endif
+
+#define MCA_COM_FLAGS  (STD_COM_FLAGS|ASYNC_BOOT_ONLYMCA)
+
+/*
+ * The following define the access methods for the HUB6 card. All
+ * access is through two ports for all 24 possible chips. The card is
+ * selected through the high 2 bits, the port on that card with the
+ * "middle" 3 bits, and the register on that port with the bottom
+ * 3 bits.
+ *
+ * While the access port and interrupt is configurable, the default
+ * port locations are 0x302 for the port control register, and 0x303
+ * for the data read/write register. Normally, the interrupt is at irq3
+ * but can be anything from 3 to 7 inclusive. Note that using 3 will
+ * require disabling com2.
+ */
+
+#define C_P(card,port) (((card)<<6|(port)<<3) + 1)
+
+#define STD_SERIAL_PORT_DEFNS                  \
+       /* UART CLK   PORT IRQ     FLAGS        */                      \
+       { 0, BASE_BAUD, 0x3F8, 4, STD_COM_FLAGS },      /* ttyS0 */     \
+       { 0, BASE_BAUD, 0x2F8, 3, STD_COM_FLAGS },      /* ttyS1 */     \
+       { 0, BASE_BAUD, 0x3E8, 4, STD_COM_FLAGS },      /* ttyS2 */     \
+       { 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS },     /* ttyS3 */
+
+
+#ifdef CONFIG_SERIAL_MANY_PORTS
+#define EXTRA_SERIAL_PORT_DEFNS                        \
+       { 0, BASE_BAUD, 0x1A0, 9, FOURPORT_FLAGS },     /* ttyS4 */     \
+       { 0, BASE_BAUD, 0x1A8, 9, FOURPORT_FLAGS },     /* ttyS5 */     \
+       { 0, BASE_BAUD, 0x1B0, 9, FOURPORT_FLAGS },     /* ttyS6 */     \
+       { 0, BASE_BAUD, 0x1B8, 9, FOURPORT_FLAGS },     /* ttyS7 */     \
+       { 0, BASE_BAUD, 0x2A0, 5, FOURPORT_FLAGS },     /* ttyS8 */     \
+       { 0, BASE_BAUD, 0x2A8, 5, FOURPORT_FLAGS },     /* ttyS9 */     \
+       { 0, BASE_BAUD, 0x2B0, 5, FOURPORT_FLAGS },     /* ttyS10 */    \
+       { 0, BASE_BAUD, 0x2B8, 5, FOURPORT_FLAGS },     /* ttyS11 */    \
+       { 0, BASE_BAUD, 0x330, 4, ACCENT_FLAGS },       /* ttyS12 */    \
+       { 0, BASE_BAUD, 0x338, 4, ACCENT_FLAGS },       /* ttyS13 */    \
+       { 0, BASE_BAUD, 0x000, 0, 0 },  /* ttyS14 (spare) */            \
+       { 0, BASE_BAUD, 0x000, 0, 0 },  /* ttyS15 (spare) */            \
+       { 0, BASE_BAUD, 0x100, 12, BOCA_FLAGS },        /* ttyS16 */    \
+       { 0, BASE_BAUD, 0x108, 12, BOCA_FLAGS },        /* ttyS17 */    \
+       { 0, BASE_BAUD, 0x110, 12, BOCA_FLAGS },        /* ttyS18 */    \
+       { 0, BASE_BAUD, 0x118, 12, BOCA_FLAGS },        /* ttyS19 */    \
+       { 0, BASE_BAUD, 0x120, 12, BOCA_FLAGS },        /* ttyS20 */    \
+       { 0, BASE_BAUD, 0x128, 12, BOCA_FLAGS },        /* ttyS21 */    \
+       { 0, BASE_BAUD, 0x130, 12, BOCA_FLAGS },        /* ttyS22 */    \
+       { 0, BASE_BAUD, 0x138, 12, BOCA_FLAGS },        /* ttyS23 */    \
+       { 0, BASE_BAUD, 0x140, 12, BOCA_FLAGS },        /* ttyS24 */    \
+       { 0, BASE_BAUD, 0x148, 12, BOCA_FLAGS },        /* ttyS25 */    \
+       { 0, BASE_BAUD, 0x150, 12, BOCA_FLAGS },        /* ttyS26 */    \
+       { 0, BASE_BAUD, 0x158, 12, BOCA_FLAGS },        /* ttyS27 */    \
+       { 0, BASE_BAUD, 0x160, 12, BOCA_FLAGS },        /* ttyS28 */    \
+       { 0, BASE_BAUD, 0x168, 12, BOCA_FLAGS },        /* ttyS29 */    \
+       { 0, BASE_BAUD, 0x170, 12, BOCA_FLAGS },        /* ttyS30 */    \
+       { 0, BASE_BAUD, 0x178, 12, BOCA_FLAGS },        /* ttyS31 */
+#else
+#define EXTRA_SERIAL_PORT_DEFNS
+#endif
+
+/* You can have up to four HUB6's in the system, but I've only
+ * included two cards here for a total of twelve ports.
+ */
+#if (defined(CONFIG_HUB6) && defined(CONFIG_SERIAL_MANY_PORTS))
+#define HUB6_SERIAL_PORT_DFNS          \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,0) },  /* ttyS32 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,1) },  /* ttyS33 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,2) },  /* ttyS34 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,3) },  /* ttyS35 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,4) },  /* ttyS36 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(0,5) },  /* ttyS37 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,0) },  /* ttyS38 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,1) },  /* ttyS39 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,2) },  /* ttyS40 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,3) },  /* ttyS41 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,4) },  /* ttyS42 */ \
+       { 0, BASE_BAUD, 0x302, 3, HUB6_FLAGS, C_P(1,5) },  /* ttyS43 */
+#else
+#define HUB6_SERIAL_PORT_DFNS
+#endif
+
+#ifdef CONFIG_MCA
+#define MCA_SERIAL_PORT_DFNS                   \
+       { 0, BASE_BAUD, 0x3220, 3, MCA_COM_FLAGS },     \
+       { 0, BASE_BAUD, 0x3228, 3, MCA_COM_FLAGS },     \
+       { 0, BASE_BAUD, 0x4220, 3, MCA_COM_FLAGS },     \
+       { 0, BASE_BAUD, 0x4228, 3, MCA_COM_FLAGS },     \
+       { 0, BASE_BAUD, 0x5220, 3, MCA_COM_FLAGS },     \
+       { 0, BASE_BAUD, 0x5228, 3, MCA_COM_FLAGS },
+#else
+#define MCA_SERIAL_PORT_DFNS
+#endif
+
+#define SERIAL_PORT_DFNS               \
+       STD_SERIAL_PORT_DEFNS           \
+       EXTRA_SERIAL_PORT_DEFNS         \
+       HUB6_SERIAL_PORT_DFNS           \
+       MCA_SERIAL_PORT_DFNS
+
diff --git a/include/asm-x86_64/setup.h b/include/asm-x86_64/setup.h
new file mode 100644 (file)
index 0000000..7079a13
--- /dev/null
@@ -0,0 +1,10 @@
+/*
+ *     Just a place holder. We don't want to have to test x86 before
+ *     we include stuff
+ */
+
+#ifndef _x8664_SETUP_H
+#define _x8664_SETUP_H
+
+
+#endif
diff --git a/include/asm-x86_64/shmbuf.h b/include/asm-x86_64/shmbuf.h
new file mode 100644 (file)
index 0000000..5a6d6dd
--- /dev/null
@@ -0,0 +1,38 @@
+#ifndef _X8664_SHMBUF_H
+#define _X8664_SHMBUF_H
+
+/* 
+ * The shmid64_ds structure for x8664 architecture.
+ * Note extra padding because this structure is passed back and forth
+ * between kernel and user space.
+ *
+ * Pad space is left for:
+ * - 2 miscellaneous 64-bit values
+ */
+
+struct shmid64_ds {
+       struct ipc64_perm       shm_perm;       /* operation perms */
+       size_t                  shm_segsz;      /* size of segment (bytes) */
+       __kernel_time_t         shm_atime;      /* last attach time */
+       __kernel_time_t         shm_dtime;      /* last detach time */
+       __kernel_time_t         shm_ctime;      /* last change time */
+       __kernel_pid_t          shm_cpid;       /* pid of creator */
+       __kernel_pid_t          shm_lpid;       /* pid of last operator */
+       unsigned long           shm_nattch;     /* no. of current attaches */
+       unsigned long           __unused4;
+       unsigned long           __unused5;
+};
+
+struct shminfo64 {
+       unsigned long   shmmax;
+       unsigned long   shmmin;
+       unsigned long   shmmni;
+       unsigned long   shmseg;
+       unsigned long   shmall;
+       unsigned long   __unused1;
+       unsigned long   __unused2;
+       unsigned long   __unused3;
+       unsigned long   __unused4;
+};
+
+#endif
diff --git a/include/asm-x86_64/shmparam.h b/include/asm-x86_64/shmparam.h
new file mode 100644 (file)
index 0000000..d702162
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _ASMX8664_SHMPARAM_H
+#define _ASMX8664_SHMPARAM_H
+
+#define        SHMLBA PAGE_SIZE                 /* attach addr a multiple of this */
+
+#endif /* _ASMX8664_SHMPARAM_H */
diff --git a/include/asm-x86_64/sigcontext.h b/include/asm-x86_64/sigcontext.h
new file mode 100644 (file)
index 0000000..948f2ce
--- /dev/null
@@ -0,0 +1,97 @@
+#ifndef _ASM_X86_64_SIGCONTEXT_H
+#define _ASM_X86_64_SIGCONTEXT_H
+
+#include <asm/types.h>
+
+/*
+ * The first part of "struct _fpstate" is just the normal i387
+ * hardware setup, the extra "status" word is used to save the
+ * coprocessor status word before entering the handler.
+ *
+ * Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ *
+ * The FPU state data structure has had to grow to accomodate the
+ * extended FPU state required by the Streaming SIMD Extensions.
+ * There is no documented standard to accomplish this at the moment.
+ */
+struct _fpreg {
+       unsigned short significand[4];
+       unsigned short exponent;
+};
+
+struct _fpxreg {
+       unsigned short significand[4];
+       unsigned short exponent;
+       unsigned short padding[3];
+};
+
+struct _xmmreg {
+       __u32   element[4];
+};
+
+
+/* This is FXSAVE layout without 64bit prefix thus 32bit compatible. 
+   This means that the IP and DPs are only 32bit and are not useful
+   in 64bit space.
+   If someone used them we would need to switch to 64bit FXSAVE.   
+*/ 
+struct _fpstate {
+       /* Regular FPU environment */
+       __u32   cw;
+       __u32   sw;
+       __u32   tag;
+       __u32   ipoff;
+       __u32   cssel;
+       __u32   dataoff;
+       __u32   datasel;
+       struct _fpreg   _st[8];
+       unsigned short  status;
+       unsigned short  magic;          /* 0xffff = regular FPU data only */
+
+       /* FXSR FPU environment */
+       __u32   _fxsr_env[6];
+       __u32   mxcsr;
+       __u32   reserved;
+       struct _fpxreg  _fxsr_st[8];
+       struct _xmmreg  _xmm[8];        /* It's actually 16 */ 
+       __u32   padding[56];
+};
+
+#define X86_FXSR_MAGIC         0x0000
+
+struct sigcontext { 
+       unsigned short gs, __gsh;
+       unsigned short fs, __fsh;
+       unsigned short es, __esh;
+       unsigned short ds, __dsh;
+       unsigned long r8;
+       unsigned long r9;
+       unsigned long r10;
+       unsigned long r12;
+       unsigned long r13;
+       unsigned long r14;
+       unsigned long r15;
+       unsigned long rdi;
+       unsigned long rsi;
+       unsigned long rbp;
+       unsigned long rbx;
+       unsigned long rdx;
+       unsigned long rax;
+       unsigned long trapno;
+       unsigned long err;
+       unsigned long rip;
+       unsigned short cs, __csh;
+       unsigned int __pad0;
+       unsigned long eflags;
+       unsigned long rsp_at_signal;
+       struct _fpstate * fpstate;
+       unsigned long oldmask;
+       unsigned long cr2;
+       unsigned long r11;
+       unsigned long rcx;
+       unsigned long rsp;
+};
+
+
+#endif
diff --git a/include/asm-x86_64/siginfo.h b/include/asm-x86_64/siginfo.h
new file mode 100644 (file)
index 0000000..54a6054
--- /dev/null
@@ -0,0 +1,232 @@
+#ifndef _X8664_SIGINFO_H
+#define _X8664_SIGINFO_H
+
+#include <linux/types.h>
+
+typedef union sigval {
+       int sival_int;
+       void *sival_ptr;
+} sigval_t;
+
+#define SI_MAX_SIZE    128
+#define SI_PAD_SIZE    ((SI_MAX_SIZE/sizeof(int)) - 3)
+
+typedef struct siginfo {
+       int si_signo;
+       int si_errno;
+       int si_code;
+
+       union {
+               int _pad[SI_PAD_SIZE];
+
+               /* kill() */
+               struct {
+                       pid_t _pid;             /* sender's pid */
+                       uid_t _uid;             /* sender's uid */
+               } _kill;
+
+               /* POSIX.1b timers */
+               struct {
+                       unsigned int _timer1;
+                       unsigned int _timer2;
+               } _timer;
+
+               /* POSIX.1b signals */
+               struct {
+                       pid_t _pid;             /* sender's pid */
+                       uid_t _uid;             /* sender's uid */
+                       sigval_t _sigval;
+               } _rt;
+
+               /* SIGCHLD */
+               struct {
+                       pid_t _pid;             /* which child */
+                       uid_t _uid;             /* sender's uid */
+                       int _status;            /* exit code */
+                       clock_t _utime;
+                       clock_t _stime;
+               } _sigchld;
+
+               /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+               struct {
+                       void *_addr; /* faulting insn/memory ref. */
+               } _sigfault;
+
+               /* SIGPOLL */
+               struct {
+                       int _band;      /* POLL_IN, POLL_OUT, POLL_MSG */
+                       int _fd;
+               } _sigpoll;
+       } _sifields;
+} siginfo_t;
+
+/*
+ * How these fields are to be accessed.
+ */
+#define si_pid         _sifields._kill._pid
+#define si_uid         _sifields._kill._uid
+#define si_status      _sifields._sigchld._status
+#define si_utime       _sifields._sigchld._utime
+#define si_stime       _sifields._sigchld._stime
+#define si_value       _sifields._rt._sigval
+#define si_int         _sifields._rt._sigval.sival_int
+#define si_ptr         _sifields._rt._sigval.sival_ptr
+#define si_addr                _sifields._sigfault._addr
+#define si_band                _sifields._sigpoll._band
+#define si_fd          _sifields._sigpoll._fd
+
+#ifdef __KERNEL__
+#define __SI_MASK      0xffff0000
+#define __SI_KILL      (0 << 16)
+#define __SI_TIMER     (1 << 16)
+#define __SI_POLL      (2 << 16)
+#define __SI_FAULT     (3 << 16)
+#define __SI_CHLD      (4 << 16)
+#define __SI_RT                (5 << 16)
+#define __SI_CODE(T,N) ((T) << 16 | ((N) & 0xffff))
+#else
+#define __SI_KILL      0
+#define __SI_TIMER     0
+#define __SI_POLL      0
+#define __SI_FAULT     0
+#define __SI_CHLD      0
+#define __SI_RT                0
+#define __SI_CODE(T,N) (N)
+#endif
+
+/*
+ * si_code values
+ * Digital reserves positive values for kernel-generated signals.
+ * ... And Linux ignores that convention -AK.
+ */
+#define SI_USER                0               /* sent by kill, sigsend, raise */
+#define SI_KERNEL      0x80            /* sent by the kernel from somewhere */
+#define SI_QUEUE       -1              /* sent by sigqueue */
+#define SI_TIMER __SI_CODE(__SI_TIMER,-2) /* sent by timer expiration */
+#define SI_MESGQ       -3              /* sent by real time mesq state change */
+#define SI_ASYNCIO     -4              /* sent by AIO completion */
+#define SI_SIGIO       -5              /* sent by queued SIGIO */
+#define SI_TKILL       -6              /* sent by tkill system call */
+
+#define SI_FROMUSER(siptr)     ((siptr)->si_code <= 0)
+#define SI_FROMKERNEL(siptr)   ((siptr)->si_code > 0)
+
+/*
+ * SIGILL si_codes
+ */
+#define ILL_ILLOPC     (__SI_FAULT|1)  /* illegal opcode */
+#define ILL_ILLOPN     (__SI_FAULT|2)  /* illegal operand */
+#define ILL_ILLADR     (__SI_FAULT|3)  /* illegal addressing mode */
+#define ILL_ILLTRP     (__SI_FAULT|4)  /* illegal trap */
+#define ILL_PRVOPC     (__SI_FAULT|5)  /* privileged opcode */
+#define ILL_PRVREG     (__SI_FAULT|6)  /* privileged register */
+#define ILL_COPROC     (__SI_FAULT|7)  /* coprocessor error */
+#define ILL_BADSTK     (__SI_FAULT|8)  /* internal stack error */
+#define NSIGILL                8
+
+/*
+ * SIGFPE si_codes
+ */
+#define FPE_INTDIV     (__SI_FAULT|1)  /* integer divide by zero */
+#define FPE_INTOVF     (__SI_FAULT|2)  /* integer overflow */
+#define FPE_FLTDIV     (__SI_FAULT|3)  /* floating point divide by zero */
+#define FPE_FLTOVF     (__SI_FAULT|4)  /* floating point overflow */
+#define FPE_FLTUND     (__SI_FAULT|5)  /* floating point underflow */
+#define FPE_FLTRES     (__SI_FAULT|6)  /* floating point inexact result */
+#define FPE_FLTINV     (__SI_FAULT|7)  /* floating point invalid operation */
+#define FPE_FLTSUB     (__SI_FAULT|8)  /* subscript out of range */
+#define NSIGFPE                8
+
+/*
+ * SIGSEGV si_codes
+ */
+#define SEGV_MAPERR    (__SI_FAULT|1)  /* address not mapped to object */
+#define SEGV_ACCERR    (__SI_FAULT|2)  /* invalid permissions for mapped object */
+#define NSIGSEGV       2
+
+/*
+ * SIGBUS si_codes
+ */
+#define BUS_ADRALN     (__SI_FAULT|1)  /* invalid address alignment */
+#define BUS_ADRERR     (__SI_FAULT|2)  /* non-existant physical address */
+#define BUS_OBJERR     (__SI_FAULT|3)  /* object specific hardware error */
+#define NSIGBUS                3
+
+/*
+ * SIGTRAP si_codes
+ */
+#define TRAP_BRKPT     (__SI_FAULT|1)  /* process breakpoint */
+#define TRAP_TRACE     (__SI_FAULT|2)  /* process trace trap */
+#define NSIGTRAP       2
+
+/*
+ * SIGCHLD si_codes
+ */
+#define CLD_EXITED     (__SI_CHLD|1)   /* child has exited */
+#define CLD_KILLED     (__SI_CHLD|2)   /* child was killed */
+#define CLD_DUMPED     (__SI_CHLD|3)   /* child terminated abnormally */
+#define CLD_TRAPPED    (__SI_CHLD|4)   /* traced child has trapped */
+#define CLD_STOPPED    (__SI_CHLD|5)   /* child has stopped */
+#define CLD_CONTINUED  (__SI_CHLD|6)   /* stopped child has continued */
+#define NSIGCHLD       6
+
+/*
+ * SIGPOLL si_codes
+ */
+#define POLL_IN                (__SI_POLL|1)   /* data input available */
+#define POLL_OUT       (__SI_POLL|2)   /* output buffers available */
+#define POLL_MSG       (__SI_POLL|3)   /* input message available */
+#define POLL_ERR       (__SI_POLL|4)   /* i/o error */
+#define POLL_PRI       (__SI_POLL|5)   /* high priority input available */
+#define POLL_HUP       (__SI_POLL|6)   /* device disconnected */
+#define NSIGPOLL       6
+
+/*
+ * sigevent definitions
+ * 
+ * It seems likely that SIGEV_THREAD will have to be handled from 
+ * userspace, libpthread transmuting it to SIGEV_SIGNAL, which the
+ * thread manager then catches and does the appropriate nonsense.
+ * However, everything is written out here so as to not get lost.
+ */
+#define SIGEV_SIGNAL   0       /* notify via signal */
+#define SIGEV_NONE     1       /* other notification: meaningless */
+#define SIGEV_THREAD   2       /* deliver via thread creation */
+
+#define SIGEV_MAX_SIZE 64
+#define SIGEV_PAD_SIZE ((SIGEV_MAX_SIZE/sizeof(int)) - 3)
+
+typedef struct sigevent {
+       sigval_t sigev_value;
+       int sigev_signo;
+       int sigev_notify;
+       union {
+               int _pad[SIGEV_PAD_SIZE];
+
+               struct {
+                       void (*_function)(sigval_t);
+                       void *_attribute;       /* really pthread_attr_t */
+               } _sigev_thread;
+       } _sigev_un;
+} sigevent_t;
+
+#define sigev_notify_function  _sigev_un._sigev_thread._function
+#define sigev_notify_attributes        _sigev_un._sigev_thread._attribute
+
+#ifdef __KERNEL__
+#include <linux/string.h>
+
+extern inline void copy_siginfo(siginfo_t *to, siginfo_t *from)
+{
+       if (from->si_code < 0)
+               memcpy(to, from, sizeof(siginfo_t));
+       else
+               /* _sigchld is currently the largest know union member */
+               memcpy(to, from, 3*sizeof(int) + sizeof(from->_sifields._sigchld));
+}
+
+extern int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from);
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/include/asm-x86_64/signal.h b/include/asm-x86_64/signal.h
new file mode 100644 (file)
index 0000000..f657934
--- /dev/null
@@ -0,0 +1,205 @@
+#ifndef _ASMx8664_SIGNAL_H
+#define _ASMx8664_SIGNAL_H
+
+#include <linux/types.h>
+#include <linux/linkage.h>
+
+/* Avoid too many header ordering problems.  */
+struct siginfo;
+
+#ifdef __KERNEL__
+/* Most things should be clean enough to redefine this at will, if care
+   is taken to make libc match.  */
+
+#define _NSIG          64
+#define _NSIG_BPW      64
+#define _NSIG_WORDS    (_NSIG / _NSIG_BPW)
+
+typedef unsigned long old_sigset_t;            /* at least 32 bits */
+
+typedef struct {
+       unsigned long sig[_NSIG_WORDS];
+} sigset_t;
+
+
+struct pt_regs; 
+asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset);
+
+
+#else
+/* Here we must cater to libcs that poke about in kernel headers.  */
+
+#define NSIG           32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+
+#define SIGHUP          1
+#define SIGINT          2
+#define SIGQUIT                 3
+#define SIGILL          4
+#define SIGTRAP                 5
+#define SIGABRT                 6
+#define SIGIOT          6
+#define SIGBUS          7
+#define SIGFPE          8
+#define SIGKILL                 9
+#define SIGUSR1                10
+#define SIGSEGV                11
+#define SIGUSR2                12
+#define SIGPIPE                13
+#define SIGALRM                14
+#define SIGTERM                15
+#define SIGSTKFLT      16
+#define SIGCHLD                17
+#define SIGCONT                18
+#define SIGSTOP                19
+#define SIGTSTP                20
+#define SIGTTIN                21
+#define SIGTTOU                22
+#define SIGURG         23
+#define SIGXCPU                24
+#define SIGXFSZ                25
+#define SIGVTALRM      26
+#define SIGPROF                27
+#define SIGWINCH       28
+#define SIGIO          29
+#define SIGPOLL                SIGIO
+/*
+#define SIGLOST                29
+*/
+#define SIGPWR         30
+#define SIGSYS         31
+#define        SIGUNUSED       31
+
+/* These should not be considered constants from userland.  */
+#define SIGRTMIN       32
+#define SIGRTMAX       (_NSIG-1)
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_INTERRUPT is a no-op, but left due to historical reasons. Use the
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP   0x00000001
+#define SA_NOCLDWAIT   0x00000002 /* not supported yet */
+#define SA_SIGINFO     0x00000004
+#define SA_ONSTACK     0x08000000
+#define SA_RESTART     0x10000000
+#define SA_NODEFER     0x40000000
+#define SA_RESETHAND   0x80000000
+
+#define SA_NOMASK      SA_NODEFER
+#define SA_ONESHOT     SA_RESETHAND
+#define SA_INTERRUPT   0x20000000 /* dummy -- ignored */
+
+#define SA_RESTORER    0x04000000
+
+/*
+ * sigaltstack controls
+ */
+#define SS_ONSTACK     1
+#define SS_DISABLE     2
+
+#define MINSIGSTKSZ    2048
+#define SIGSTKSZ       8192
+
+#ifdef __KERNEL__
+
+/*
+ * These values of sa_flags are used only by the kernel as part of the
+ * irq handling routines.
+ *
+ * SA_INTERRUPT is also used by the irq handling routines.
+ * SA_SHIRQ is for shared interrupt support on PCI and EISA.
+ */
+#define SA_PROBE               SA_ONESHOT
+#define SA_SAMPLE_RANDOM       SA_RESTART
+#define SA_SHIRQ               0x04000000
+#endif
+
+#define SIG_BLOCK          0   /* for blocking signals */
+#define SIG_UNBLOCK        1   /* for unblocking signals */
+#define SIG_SETMASK        2   /* for setting the signal mask */
+
+/* Type of a signal handler.  */
+typedef void (*__sighandler_t)(int);
+
+#define SIG_DFL        ((__sighandler_t)0)     /* default signal handling */
+#define SIG_IGN        ((__sighandler_t)1)     /* ignore signal */
+#define SIG_ERR        ((__sighandler_t)-1)    /* error return from signal */
+
+#ifdef __KERNEL__
+struct sigaction {
+       __sighandler_t sa_handler;
+       unsigned long sa_flags;
+       void (*sa_restorer)(void);
+       sigset_t sa_mask;               /* mask last for extensibility */
+};
+
+struct k_sigaction {
+       struct sigaction sa;
+};
+#endif /* __KERNEL__ */
+
+typedef struct sigaltstack {
+       void *ss_sp;
+       int ss_flags;
+       size_t ss_size;
+} stack_t;
+
+#ifdef __KERNEL__
+#include <asm/sigcontext.h>
+
+#undef __HAVE_ARCH_SIG_BITOPS
+#if 0
+
+extern __inline__ void sigaddset(sigset_t *set, int _sig)
+{
+       __asm__("btsq %1,%0" : "=m"(*set) : "Ir"(_sig - 1) : "cc");
+}
+
+extern __inline__ void sigdelset(sigset_t *set, int _sig)
+{
+       __asm__("btrq %1,%0" : "=m"(*set) : "Ir"(_sig - 1) : "cc");
+}
+
+extern __inline__ int __const_sigismember(sigset_t *set, int _sig)
+{
+       unsigned long sig = _sig - 1;
+       return 1 & (set->sig[sig / _NSIG_BPW] >> (sig & ~(_NSIG_BPW-1)));
+}
+
+extern __inline__ int __gen_sigismember(sigset_t *set, int _sig)
+{
+       int ret;
+       __asm__("btq %2,%1\n\tsbbq %0,%0"
+               : "=r"(ret) : "m"(*set), "Ir"(_sig-1) : "cc");
+       return ret;
+}
+
+#define sigismember(set,sig)                   \
+       (__builtin_constant_p(sig) ?            \
+        __const_sigismember((set),(sig)) :     \
+        __gen_sigismember((set),(sig)))
+
+#define sigmask(sig)   (1UL << ((sig) - 1))
+
+extern __inline__ int sigfindinword(unsigned long word)
+{
+       __asm__("bsfq %1,%0" : "=r"(word) : "rm"(word) : "cc");
+       return word;
+}
+#endif
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
new file mode 100644 (file)
index 0000000..5a65d3f
--- /dev/null
@@ -0,0 +1,98 @@
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+/*
+ * We need the APIC definitions automatically as part of 'smp.h'
+ */
+#ifndef __ASSEMBLY__
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/ptrace.h>
+#endif
+
+#ifdef CONFIG_X86_LOCAL_APIC
+#ifndef __ASSEMBLY__
+#include <asm/fixmap.h>
+#include <asm/bitops.h>
+#include <asm/mpspec.h>
+#ifdef CONFIG_X86_IO_APIC
+#include <asm/io_apic.h>
+#endif
+#include <asm/apic.h>
+#endif
+#endif
+
+#ifdef CONFIG_SMP
+#ifndef ASSEMBLY
+
+#include <asm/pda.h>
+
+/*
+ * Private routines/data
+ */
+extern void smp_alloc_memory(void);
+extern unsigned long phys_cpu_present_map;
+extern unsigned long cpu_online_map;
+extern volatile unsigned long smp_invalidate_needed;
+extern int pic_mode;
+extern void smp_flush_tlb(void);
+extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
+extern void smp_send_reschedule(int cpu);
+extern void smp_send_reschedule_all(void);
+extern void smp_invalidate_rcv(void);          /* Process an NMI */
+extern void (*mtrr_hook) (void);
+extern void zap_low_mappings (void);
+
+/*
+ * On x86 all CPUs are mapped 1:1 to the APIC space.
+ * This simplifies scheduling and IPI sending and
+ * compresses data structures.
+ */
+extern inline int cpu_logical_map(int cpu)
+{
+       return cpu;
+}
+extern inline int cpu_number_map(int cpu)
+{
+       return cpu;
+}
+
+/*
+ * Some lowlevel functions might want to know about
+ * the real APIC ID <-> CPU # mapping.
+ */
+extern volatile int x86_apicid_to_cpu[NR_CPUS];
+extern volatile int x86_cpu_to_apicid[NR_CPUS];
+
+/*
+ * General functions that each host system must provide.
+ */
+extern void smp_boot_cpus(void);
+extern void smp_store_cpu_info(int id);                /* Store per CPU info (like the initial udelay numbers */
+
+/*
+ * This function is needed by all SMP systems. It must _always_ be valid
+ * from the initial startup. We map APIC_BASE very early in page_setup(),
+ * so this is correct in the x86 case.
+ */
+
+#define smp_processor_id() read_pda(cpunumber)
+
+extern __inline int hard_smp_processor_id(void)
+{
+       /* we don't want to mark this access volatile - bad code generation */
+       return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
+}
+
+#endif /* !ASSEMBLY */
+
+#define NO_PROC_ID             0xFF            /* No processor magic marker */
+
+
+
+#endif
+#define INT_DELIVERY_MODE 1     /* logical delivery */
+#define TARGET_CPUS 1
+#endif
diff --git a/include/asm-x86_64/smplock.h b/include/asm-x86_64/smplock.h
new file mode 100644 (file)
index 0000000..6c0b652
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * <asm/smplock.h>
+ */
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <asm/current.h>
+
+extern spinlock_t kernel_flag;
+
+#ifdef CONFIG_SMP
+#define kernel_locked()                spin_is_locked(&kernel_flag)
+#define check_irq_holder(cpu)  \
+       if (global_irq_holder == (cpu)) \
+                       BUG();                 
+#else
+#ifdef CONFIG_PREEMPT
+#define kernel_locked()                preempt_get_count()
+#define global_irq_holder      0
+#define check_irq_holder(cpu) do {} while(0)
+#else
+#define kernel_locked()                1
+#define check_irq_holder(cpu)  \
+       if (global_irq_holder == (cpu)) \
+                       BUG();                 
+#endif
+#endif
+
+/*
+ * Release global kernel lock and global interrupt lock
+ */
+#define release_kernel_lock(task, cpu) \
+do { \
+       if (unlikely(task->lock_depth >= 0)) {  \
+               spin_unlock(&kernel_flag); \
+               check_irq_holder(cpu);  \
+       }                                       \
+} while (0)
+
+/*
+ * Re-acquire the kernel lock
+ */
+#define reacquire_kernel_lock(task) \
+do { \
+       if (unlikely(task->lock_depth >= 0))    \
+               spin_lock(&kernel_flag); \
+} while (0)
+
+
+/*
+ * Getting the big kernel lock.
+ *
+ * This cannot happen asynchronously,
+ * so we only need to worry about other
+ * CPU's.
+ */
+extern __inline__ void lock_kernel(void)
+{
+#ifdef CONFIG_PREEMPT
+       if (current->lock_depth == -1)
+               spin_lock(&kernel_flag);
+       ++current->lock_depth;
+#else
+#if 1
+       if (!++current->lock_depth)
+               spin_lock(&kernel_flag);
+#else
+       __asm__ __volatile__(
+               "incl %1\n\t"
+               "jne 9f"
+               spin_lock_string
+               "\n9:"
+               :"=m" (__dummy_lock(&kernel_flag)),
+                "=m" (current->lock_depth));
+#endif
+#endif
+}
+
+extern __inline__ void unlock_kernel(void)
+{
+       if (current->lock_depth < 0)
+               BUG();
+#if 1
+       if (--current->lock_depth < 0)
+               spin_unlock(&kernel_flag);
+#else
+       __asm__ __volatile__(
+               "decl %1\n\t"
+               "jns 9f\n\t"
+               spin_unlock_string
+               "\n9:"
+               :"=m" (__dummy_lock(&kernel_flag)),
+                "=m" (current->lock_depth));
+#endif
+}
diff --git a/include/asm-x86_64/socket.h b/include/asm-x86_64/socket.h
new file mode 100644 (file)
index 0000000..fbcc44d
--- /dev/null
@@ -0,0 +1,64 @@
+#ifndef _ASM_SOCKET_H
+#define _ASM_SOCKET_H
+
+#include <asm/sockios.h>
+
+/* For setsockoptions(2) */
+#define SOL_SOCKET     1
+
+#define SO_DEBUG       1
+#define SO_REUSEADDR   2
+#define SO_TYPE                3
+#define SO_ERROR       4
+#define SO_DONTROUTE   5
+#define SO_BROADCAST   6
+#define SO_SNDBUF      7
+#define SO_RCVBUF      8
+#define SO_KEEPALIVE   9
+#define SO_OOBINLINE   10
+#define SO_NO_CHECK    11
+#define SO_PRIORITY    12
+#define SO_LINGER      13
+#define SO_BSDCOMPAT   14
+/* To add :#define SO_REUSEPORT 15 */
+#define SO_PASSCRED    16
+#define SO_PEERCRED    17
+#define SO_RCVLOWAT    18
+#define SO_SNDLOWAT    19
+#define SO_RCVTIMEO    20
+#define SO_SNDTIMEO    21
+
+/* Security levels - as per NRL IPv6 - don't actually do anything */
+#define SO_SECURITY_AUTHENTICATION             22
+#define SO_SECURITY_ENCRYPTION_TRANSPORT       23
+#define SO_SECURITY_ENCRYPTION_NETWORK         24
+
+#define SO_BINDTODEVICE        25
+
+/* Socket filtering */
+#define SO_ATTACH_FILTER        26
+#define SO_DETACH_FILTER        27
+
+#define SO_PEERNAME            28
+#define SO_TIMESTAMP           29
+#define SCM_TIMESTAMP          SO_TIMESTAMP
+
+#define SO_ACCEPTCONN          30
+
+/* Nasty libc5 fixup - bletch */
+#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
+/* Socket types. */
+#define SOCK_STREAM    1               /* stream (connection) socket   */
+#define SOCK_DGRAM     2               /* datagram (conn.less) socket  */
+#define SOCK_RAW       3               /* raw socket                   */
+#define SOCK_RDM       4               /* reliably-delivered message   */
+#define SOCK_SEQPACKET 5               /* sequential packet socket     */
+#define SOCK_PACKET    10              /* linux specific way of        */
+                                       /* getting packets at the dev   */
+                                       /* level.  For writing rarp and */
+                                       /* other similar things on the  */
+                                       /* user level.                  */
+#define        SOCK_MAX        (SOCK_PACKET+1)
+#endif
+
+#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-x86_64/socket32.h b/include/asm-x86_64/socket32.h
new file mode 100644 (file)
index 0000000..0a3df29
--- /dev/null
@@ -0,0 +1,70 @@
+#ifndef SOCKET32_H
+#define SOCKET32_H 1
+
+/* XXX This really belongs in some header file... -DaveM */
+#define MAX_SOCK_ADDR  128             /* 108 for Unix domain - 
+                                          16 for IP, 16 for IPX,
+                                          24 for IPv6,
+                                          about 80 for AX.25 */
+
+extern struct socket *sockfd_lookup(int fd, int *err);
+
+/* XXX This as well... */
+extern __inline__ void sockfd_put(struct socket *sock)
+{
+       fput(sock->file);
+}
+
+struct msghdr32 {
+        u32               msg_name;
+        int               msg_namelen;
+        u32               msg_iov;
+        __kernel_size_t32 msg_iovlen;
+        u32               msg_control;
+        __kernel_size_t32 msg_controllen;
+        unsigned          msg_flags;
+};
+
+struct cmsghdr32 {
+        __kernel_size_t32 cmsg_len;
+        int               cmsg_level;
+        int               cmsg_type;
+};
+
+/* Bleech... */
+#define __CMSG32_NXTHDR(ctl, len, cmsg, cmsglen) __cmsg32_nxthdr((ctl),(len),(cmsg),(cmsglen))
+#define CMSG32_NXTHDR(mhdr, cmsg, cmsglen) cmsg32_nxthdr((mhdr), (cmsg), (cmsglen))
+
+#define CMSG32_ALIGN(len) ( ((len)+sizeof(int)-1) & ~(sizeof(int)-1) )
+
+#define CMSG32_DATA(cmsg)      ((void *)((char *)(cmsg) + CMSG32_ALIGN(sizeof(struct cmsghdr32))))
+#define CMSG32_SPACE(len) (CMSG32_ALIGN(sizeof(struct cmsghdr32)) + CMSG32_ALIGN(len))
+#define CMSG32_LEN(len) (CMSG32_ALIGN(sizeof(struct cmsghdr32)) + (len))
+
+#define __CMSG32_FIRSTHDR(ctl,len) ((len) >= sizeof(struct cmsghdr32) ? \
+                                   (struct cmsghdr32 *)(ctl) : \
+                                   (struct cmsghdr32 *)NULL)
+#define CMSG32_FIRSTHDR(msg)   __CMSG32_FIRSTHDR((msg)->msg_control, (msg)->msg_controllen)
+
+__inline__ struct cmsghdr32 *__cmsg32_nxthdr(void *__ctl, __kernel_size_t __size,
+                                             struct cmsghdr32 *__cmsg, int __cmsg_len)
+{
+       struct cmsghdr32 * __ptr;
+
+       __ptr = (struct cmsghdr32 *)(((unsigned char *) __cmsg) +
+                                    CMSG32_ALIGN(__cmsg_len));
+       if ((unsigned long)((char*)(__ptr+1) - (char *) __ctl) > __size)
+               return NULL;
+
+       return __ptr;
+}
+
+__inline__ struct cmsghdr32 *cmsg32_nxthdr (struct msghdr *__msg,
+                                           struct cmsghdr32 *__cmsg,
+                                           int __cmsg_len)
+{
+       return __cmsg32_nxthdr(__msg->msg_control, __msg->msg_controllen,
+                              __cmsg, __cmsg_len);
+}
+
+#endif
diff --git a/include/asm-x86_64/sockios.h b/include/asm-x86_64/sockios.h
new file mode 100644 (file)
index 0000000..2eefd10
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef __ARCH_X8664_SOCKIOS__
+#define __ARCH_X8664_SOCKIOS__
+
+/* Socket-level I/O control calls. */
+#define FIOSETOWN      0x8901
+#define SIOCSPGRP      0x8902
+#define FIOGETOWN      0x8903
+#define SIOCGPGRP      0x8904
+#define SIOCATMARK     0x8905
+#define SIOCGSTAMP     0x8906          /* Get stamp */
+
+#endif
diff --git a/include/asm-x86_64/softirq.h b/include/asm-x86_64/softirq.h
new file mode 100644 (file)
index 0000000..e85c376
--- /dev/null
@@ -0,0 +1,34 @@
+#ifndef __ASM_SOFTIRQ_H
+#define __ASM_SOFTIRQ_H
+
+#include <asm/atomic.h>
+#include <asm/hardirq.h>
+#include <asm/pda.h>
+
+#define __cpu_bh_enable() do { \
+       barrier(); sub_pda(__local_bh_count,1); preempt_enable(); } while (0)
+#define cpu_bh_disable() do { \
+       preempt_disable(); add_pda(__local_bh_count,1); barrier(); } while (0)
+
+#define local_bh_disable()     cpu_bh_disable()
+#define __local_bh_enable()    __cpu_bh_enable()
+
+#define in_softirq() (read_pda(__local_bh_count) != 0)
+
+#define _local_bh_enable() do {                                                        \
+       asm volatile(                                                           \
+               "decl %%gs:%c1;"                                                \
+               "jnz 1f;"                                                       \
+               "cmpl $0,%%gs:%c0;"                                             \
+               "jnz 2f;"                                                       \
+               "1:;"                                                           \
+               ".section .text.lock,\"ax\";"                                   \
+               "2: call do_softirq_thunk;"                                     \
+               "jmp 1b;"                                                       \
+               ".previous"                                                     \
+               :: "i" (pda___softirq_pending), "i" (pda___local_bh_count) : \
+               "memory");      \
+} while (0)
+#define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while(0)
+               
+#endif /* __ASM_SOFTIRQ_H */
diff --git a/include/asm-x86_64/spinlock.h b/include/asm-x86_64/spinlock.h
new file mode 100644 (file)
index 0000000..b68101e
--- /dev/null
@@ -0,0 +1,181 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#include <asm/atomic.h>
+#include <asm/rwlock.h>
+#include <asm/page.h>
+#include <linux/config.h>
+
+extern int printk(const char * fmt, ...)
+       __attribute__ ((format (printf, 1, 2)));
+
+/* It seems that people are forgetting to
+ * initialize their spinlocks properly, tsk tsk.
+ * Remember to turn this off in 2.4. -ben
+ */
+#if defined(CONFIG_DEBUG_SPINLOCK)
+#define SPINLOCK_DEBUG 1
+#else
+#define SPINLOCK_DEBUG 0
+#endif
+
+/*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ */
+
+typedef struct {
+       volatile unsigned int lock;
+#if SPINLOCK_DEBUG
+       unsigned magic;
+#endif
+} spinlock_t;
+
+#define SPINLOCK_MAGIC 0xdead4ead
+
+#if SPINLOCK_DEBUG
+#define SPINLOCK_MAGIC_INIT    , SPINLOCK_MAGIC
+#else
+#define SPINLOCK_MAGIC_INIT    /* */
+#endif
+
+#define SPIN_LOCK_UNLOCKED (spinlock_t) { 1 SPINLOCK_MAGIC_INIT }
+
+#define spin_lock_init(x)      do { *(x) = SPIN_LOCK_UNLOCKED; } while(0)
+
+/*
+ * Simple spin lock operations.  There are two variants, one clears IRQ's
+ * on the local processor, one does not.
+ *
+ * We make no fairness assumptions. They have a cost.
+ */
+
+#define spin_is_locked(x)      (*(volatile char *)(&(x)->lock) <= 0)
+#define spin_unlock_wait(x)    do { barrier(); } while(spin_is_locked(x))
+
+#define spin_lock_string \
+       "\n1:\t" \
+       "lock ; decb %0\n\t" \
+       "js 2f\n" \
+       ".section .text.lock,\"ax\"\n" \
+       "2:\t" \
+       "cmpb $0,%0\n\t" \
+       "rep;nop\n\t" \
+       "jle 2b\n\t" \
+       "jmp 1b\n" \
+       ".previous"
+
+/*
+ * This works. Despite all the confusion.
+ */
+#define spin_unlock_string \
+       "movb $1,%0"
+
+static inline int _raw_spin_trylock(spinlock_t *lock)
+{
+       char oldval;
+       __asm__ __volatile__(
+               "xchgb %b0,%1"
+               :"=q" (oldval), "=m" (lock->lock)
+               :"0" (0) : "memory");
+       return oldval > 0;
+}
+
+static inline void _raw_spin_lock(spinlock_t *lock)
+{
+#if SPINLOCK_DEBUG
+       __label__ here;
+here:
+       if (lock->magic != SPINLOCK_MAGIC) {
+printk("eip: %p\n", &&here);
+               BUG();
+       }
+#endif
+       __asm__ __volatile__(
+               spin_lock_string
+               :"=m" (lock->lock) : : "memory");
+}
+
+static inline void _raw_spin_unlock(spinlock_t *lock)
+{
+#if SPINLOCK_DEBUG
+       if (lock->magic != SPINLOCK_MAGIC)
+               BUG();
+       if (!spin_is_locked(lock))
+               BUG();
+#endif
+       __asm__ __volatile__(
+               spin_unlock_string
+               :"=m" (lock->lock) : : "memory");
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct {
+       volatile unsigned int lock;
+#if SPINLOCK_DEBUG
+       unsigned magic;
+#endif
+} rwlock_t;
+
+#define RWLOCK_MAGIC   0xdeaf1eed
+
+#if SPINLOCK_DEBUG
+#define RWLOCK_MAGIC_INIT      , RWLOCK_MAGIC
+#else
+#define RWLOCK_MAGIC_INIT      /* */
+#endif
+
+#define RW_LOCK_UNLOCKED (rwlock_t) { RW_LOCK_BIAS RWLOCK_MAGIC_INIT }
+
+#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
+
+/*
+ * On x86, we implement read-write locks as a 32-bit counter
+ * with the high bit (sign) being the "contended" bit.
+ *
+ * The inline assembly is non-obvious. Think about it.
+ *
+ * Changed to use the same technique as rw semaphores.  See
+ * semaphore.h for details.  -ben
+ */
+/* the spinlock helpers are in arch/x86_64/kernel/semaphore.S */
+
+extern inline void _raw_read_lock(rwlock_t *rw)
+{
+#if SPINLOCK_DEBUG
+       if (rw->magic != RWLOCK_MAGIC)
+               BUG();
+#endif
+       __build_read_lock(rw, "__read_lock_failed");
+}
+
+static inline void _raw_write_lock(rwlock_t *rw)
+{
+#if SPINLOCK_DEBUG
+       if (rw->magic != RWLOCK_MAGIC)
+               BUG();
+#endif
+       __build_write_lock(rw, "__write_lock_failed");
+}
+
+#define _raw_read_unlock(rw)           asm volatile("lock ; incl %0" :"=m" ((rw)->lock) : : "memory")
+#define _raw_write_unlock(rw)  asm volatile("lock ; addl $" RW_LOCK_BIAS_STR ",%0":"=m" ((rw)->lock) : : "memory")
+
+static inline int _raw_write_trylock(rwlock_t *lock)
+{
+       atomic_t *count = (atomic_t *)lock;
+       if (atomic_sub_and_test(RW_LOCK_BIAS, count))
+               return 1;
+       atomic_add(RW_LOCK_BIAS, count);
+       return 0;
+}
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/include/asm-x86_64/stat.h b/include/asm-x86_64/stat.h
new file mode 100644 (file)
index 0000000..45c1da4
--- /dev/null
@@ -0,0 +1,27 @@
+#ifndef _ASM_X86_64_STAT_H
+#define _ASM_X86_64_STAT_H
+
+struct stat {
+       unsigned long   st_dev;
+       unsigned long   st_ino;
+       unsigned long   st_nlink;
+
+       unsigned int    st_mode;
+       unsigned int    st_uid;
+       unsigned int    st_gid;
+       unsigned int    __pad0;
+       unsigned long   st_rdev;
+       long            st_size;
+       long            st_blksize;
+       long            st_blocks;      /* Number 512-byte blocks allocated. */
+
+       unsigned long   st_atime;
+       unsigned long   __reserved0;    /* reserved for atime.nanoseconds */
+       unsigned long   st_mtime;
+       unsigned long   __reserved1;    /* reserved for atime.nanoseconds */
+       unsigned long   st_ctime;
+       unsigned long   __reserved2;    /* reserved for atime.nanoseconds */
+       long            __unused[3];
+};
+
+#endif
diff --git a/include/asm-x86_64/statfs.h b/include/asm-x86_64/statfs.h
new file mode 100644 (file)
index 0000000..2d6ea7a
--- /dev/null
@@ -0,0 +1,25 @@
+#ifndef _X86_64_STATFS_H
+#define _X86_64_STATFS_H
+
+#ifndef __KERNEL_STRICT_NAMES
+
+#include <linux/types.h>
+
+typedef __kernel_fsid_t        fsid_t;
+
+#endif
+
+struct statfs {
+       long f_type;
+       long f_bsize;
+       long f_blocks;
+       long f_bfree;
+       long f_bavail;
+       long f_files;
+       long f_ffree;
+       __kernel_fsid_t f_fsid;
+       long f_namelen;
+       long f_spare[6];
+};
+
+#endif
diff --git a/include/asm-x86_64/string.h b/include/asm-x86_64/string.h
new file mode 100644 (file)
index 0000000..2e7b14a
--- /dev/null
@@ -0,0 +1,38 @@
+#ifndef _X86_64_STRING_H_
+#define _X86_64_STRING_H_
+
+#ifdef __KERNEL__
+#include <linux/config.h>
+
+#define struct_cpy(x,y) (*(x)=*(y))
+
+#define __HAVE_ARCH_MEMCMP
+#define __HAVE_ARCH_STRLEN
+
+#define memset __builtin_memset
+#define memcpy __builtin_memcpy
+#define memcmp __builtin_memcmp
+
+/* Work around "undefined reference to strlen" linker errors.  */
+/* #define strlen __builtin_strlen */
+
+#define __HAVE_ARCH_STRLEN
+static inline size_t strlen(const char * s)
+{
+int d0;
+register int __res;
+__asm__ __volatile__(
+       "repne\n\t"
+       "scasb\n\t"
+       "notl %0\n\t"
+       "decl %0"
+       :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff));
+return __res;
+}
+
+
+extern char *strstr(const char *cs, const char *ct);
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h
new file mode 100644 (file)
index 0000000..d97077c
--- /dev/null
@@ -0,0 +1,283 @@
+#ifndef __ASM_SYSTEM_H
+#define __ASM_SYSTEM_H
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <asm/segment.h>
+
+#ifdef __KERNEL__
+
+#ifdef CONFIG_SMP
+#define LOCK_PREFIX "lock ; "
+#else
+#define LOCK_PREFIX ""
+#endif
+
+struct task_struct;    /* one of the stranger aspects of C forward declarations.. */
+extern void __switch_to(struct task_struct *prev, struct task_struct *next);
+
+#define prepare_to_switch()    do { } while(0)
+
+#define switch_to(prev,next,last) do {                                 \
+       asm volatile("pushq %%rbp\n\t"                                  \
+                    "pushq %%rbx\n\t"                                  \
+                    "pushq %%r8\n\t"                                   \
+                    "pushq %%r9\n\t"                                   \
+                    "pushq %%r10\n\t"                                  \
+                    "pushq %%r11\n\t"                                  \
+                    "pushq %%r12\n\t"                                  \
+                    "pushq %%r13\n\t"                                  \
+                    "pushq %%r14\n\t"                                  \
+                    "pushq %%r15\n\t"                                  \
+                    "movq %%rsp,%0\n\t"        /* save RSP */          \
+                    "movq %3,%%rsp\n\t"        /* restore RSP */       \
+                    "leaq 1f(%%rip),%%rbp\n\t"                         \
+                    "movq %%rbp,%1\n\t"        /* save RIP */          \
+                    "pushq %4\n\t"             /* setup new RIP */     \
+                    "jmp __switch_to\n\t"              \
+                    "1:\t"             \
+                    "popq %%r15\n\t"                           \
+                    "popq %%r14\n\t"                                   \
+                    "popq %%r13\n\t"                                   \
+                    "popq %%r12\n\t"                                   \
+                    "popq %%r11\n\t"                                   \
+                    "popq %%r10\n\t"                                   \
+                    "popq %%r9\n\t"                                    \
+                    "popq %%r8\n\t"                                    \
+                    "popq %%rbx\n\t"                                   \
+                    "popq %%rbp\n\t"                                   \
+                    :"=m" (prev->thread.rsp),"=m" (prev->thread.rip),  \
+                     "=b" (last)                                       \
+                    :"m" (next->thread.rsp),"m" (next->thread.rip),    \
+                     "b" (prev), "S" (next), "D" (prev));              \
+} while (0)
+
+/*
+ * Load a segment. Fall back on loading the zero
+ * segment if something goes wrong..
+ */
+#define loadsegment(seg,value) do { int v = value;     \
+       asm volatile("\n"                       \
+               "1:\t"                          \
+               "movl %0,%%" #seg "\n"          \
+               "2:\n"                          \
+               ".section .fixup,\"ax\"\n"      \
+               "3:\t"                          \
+               "pushq $0 ; popq %% " #seg "\n\t"       \
+               "jmp 2b\n"                      \
+               ".previous\n"                   \
+               ".section __ex_table,\"a\"\n\t" \
+               ".align 4\n\t"                  \
+               ".quad 1b,3b\n"                 \
+               ".previous"                     \
+               : :"r" (v)); } while(0)
+
+#define set_debug(value,register) \
+                __asm__("movq %0,%%db" #register  \
+               : /* no output */ \
+               :"r" ((unsigned long) value))
+
+
+/*
+ * Clear and set 'TS' bit respectively
+ */
+#define clts() __asm__ __volatile__ ("clts")
+#define read_cr0() ({ \
+       unsigned long __dummy; \
+       __asm__( \
+               "movq %%cr0,%0\n\t" \
+               :"=r" (__dummy)); \
+       __dummy; \
+})
+#define write_cr0(x) \
+       __asm__("movq %0,%%cr0": :"r" (x));
+
+#define read_cr4() ({ \
+       unsigned long __dummy; \
+       __asm__( \
+               "movq %%cr4,%0\n\t" \
+               :"=r" (__dummy)); \
+       __dummy; \
+})
+#define write_cr4(x) \
+       __asm__("movq %0,%%cr4": :"r" (x));
+#define stts() write_cr0(8 | read_cr0())
+
+#define wbinvd() \
+       __asm__ __volatile__ ("wbinvd": : :"memory");
+
+#endif /* __KERNEL__ */
+
+#define nop() __asm__ __volatile__ ("nop")
+
+#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+
+#define tas(ptr) (xchg((ptr),1))
+
+#define __xg(x) ((volatile long *)(x))
+
+extern inline void set_64bit(volatile unsigned long *ptr, unsigned long val)
+{
+       *ptr = val;
+}
+
+#define _set_64bit set_64bit
+
+/*
+ * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
+ * Note 2: xchg has side effect, so that attribute volatile is necessary,
+ *       but generally the primitive is invalid, *ptr is output argument. --ANK
+ */
+static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
+{
+       switch (size) {
+               case 1:
+                       __asm__ __volatile__("xchgb %b0,%1"
+                               :"=q" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+               case 2:
+                       __asm__ __volatile__("xchgw %w0,%1"
+                               :"=r" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+               case 4:
+                       __asm__ __volatile__("xchgl %k0,%1"
+                               :"=r" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+               case 8:
+                       __asm__ __volatile__("xchgq %0,%1"
+                               :"=r" (x)
+                               :"m" (*__xg(ptr)), "0" (x)
+                               :"memory");
+                       break;
+       }
+       return x;
+}
+
+/*
+ * Atomic compare and exchange.  Compare OLD with MEM, if identical,
+ * store NEW in MEM.  Return the initial value in MEM.  Success is
+ * indicated by comparing RETURN with OLD.
+ */
+
+#define __HAVE_ARCH_CMPXCHG 1
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+                                     unsigned long new, int size)
+{
+       unsigned long prev;
+       switch (size) {
+       case 1:
+               __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "memory");
+               return prev;
+       case 2:
+               __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "memory");
+               return prev;
+       case 4:
+               __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "memory");
+               return prev;
+       case 8:
+               __asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2"
+                                    : "=a"(prev)
+                                    : "q"(new), "m"(*__xg(ptr)), "0"(old)
+                                    : "memory");
+               return prev;
+       }
+       return old;
+}
+
+#define cmpxchg(ptr,o,n)\
+       ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+                                       (unsigned long)(n),sizeof(*(ptr))))
+
+
+#ifdef CONFIG_SMP
+#define smp_mb()       mb()
+#define smp_rmb()      rmb()
+#define smp_wmb()      wmb()
+#else
+#define smp_mb()       barrier()
+#define smp_rmb()      barrier()
+#define smp_wmb()      barrier()
+#endif
+
+    
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ *
+ * For now, "wmb()" doesn't actually do anything, as all
+ * Intel CPU's follow what Intel calls a *Processor Order*,
+ * in which all writes are seen in the program order even
+ * outside the CPU.
+ *
+ * I expect future Intel CPU's to have a weaker ordering,
+ * but I'd also expect them to finally get their act together
+ * and add some real memory barriers if so.
+ */
+#define mb()   __asm__ __volatile__ ("lock; addl $0,0(%%rsp)": : :"memory")
+#define rmb()  mb()
+#define wmb()  __asm__ __volatile__ ("": : :"memory")
+#define set_mb(var, value) do { xchg(&var, value); } while (0)
+#define set_wmb(var, value) do { var = value; wmb(); } while (0)
+
+/* interrupt control.. */
+#define __save_flags(x)                __asm__ __volatile__("# save_flags \n\t pushfq ; popq %q0":"=g" (x): /* no input */ :"memory")
+#define __restore_flags(x)     __asm__ __volatile__("# restore_flags \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory", "cc")
+#define __cli()                __asm__ __volatile__("cli": : :"memory")
+#define __sti()                        __asm__ __volatile__("sti": : :"memory")
+/* used in the idle loop; sti takes one instruction cycle to complete */
+#define safe_halt()            __asm__ __volatile__("sti; hlt": : :"memory")
+
+/* For spinlocks etc */
+#define local_irq_save(x)      __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory")
+#define local_irq_restore(x)   __asm__ __volatile__("# local_irq_restore \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory")
+#define local_irq_disable()    __asm__ __volatile__("cli": : :"memory")
+#define local_irq_enable()     __asm__ __volatile__("sti": : :"memory")
+
+#ifdef CONFIG_SMP
+
+extern void __global_cli(void);
+extern void __global_sti(void);
+extern unsigned long __global_save_flags(void);
+extern void __global_restore_flags(unsigned long);
+#define cli() __global_cli()
+#define sti() __global_sti()
+#define save_flags(x) ((x)=__global_save_flags())
+#define restore_flags(x) __global_restore_flags(x)
+
+#else
+
+#define cli() __cli()
+#define sti() __sti()
+#define save_flags(x) __save_flags(x)
+#define restore_flags(x) __restore_flags(x)
+
+#endif
+
+#define icebp() asm volatile("xchg %bx,%bx")
+
+
+/*
+ * disable hlt during certain critical i/o operations
+ */
+#define HAVE_DISABLE_HLT
+void disable_hlt(void);
+void enable_hlt(void);
+
+#endif
diff --git a/include/asm-x86_64/termbits.h b/include/asm-x86_64/termbits.h
new file mode 100644 (file)
index 0000000..6db6223
--- /dev/null
@@ -0,0 +1,172 @@
+#ifndef __ARCH_X8664_TERMBITS_H__
+#define __ARCH_X8664_TERMBITS_H__
+
+#include <linux/posix_types.h>
+
+typedef unsigned char  cc_t;
+typedef unsigned int   speed_t;
+typedef unsigned int   tcflag_t;
+
+#define NCCS 19
+struct termios {
+       tcflag_t c_iflag;               /* input mode flags */
+       tcflag_t c_oflag;               /* output mode flags */
+       tcflag_t c_cflag;               /* control mode flags */
+       tcflag_t c_lflag;               /* local mode flags */
+       cc_t c_line;                    /* line discipline */
+       cc_t c_cc[NCCS];                /* control characters */
+};
+
+/* c_cc characters */
+#define VINTR 0
+#define VQUIT 1
+#define VERASE 2
+#define VKILL 3
+#define VEOF 4
+#define VTIME 5
+#define VMIN 6
+#define VSWTC 7
+#define VSTART 8
+#define VSTOP 9
+#define VSUSP 10
+#define VEOL 11
+#define VREPRINT 12
+#define VDISCARD 13
+#define VWERASE 14
+#define VLNEXT 15
+#define VEOL2 16
+
+/* c_iflag bits */
+#define IGNBRK 0000001
+#define BRKINT 0000002
+#define IGNPAR 0000004
+#define PARMRK 0000010
+#define INPCK  0000020
+#define ISTRIP 0000040
+#define INLCR  0000100
+#define IGNCR  0000200
+#define ICRNL  0000400
+#define IUCLC  0001000
+#define IXON   0002000
+#define IXANY  0004000
+#define IXOFF  0010000
+#define IMAXBEL        0020000
+
+/* c_oflag bits */
+#define OPOST  0000001
+#define OLCUC  0000002
+#define ONLCR  0000004
+#define OCRNL  0000010
+#define ONOCR  0000020
+#define ONLRET 0000040
+#define OFILL  0000100
+#define OFDEL  0000200
+#define NLDLY  0000400
+#define   NL0  0000000
+#define   NL1  0000400
+#define CRDLY  0003000
+#define   CR0  0000000
+#define   CR1  0001000
+#define   CR2  0002000
+#define   CR3  0003000
+#define TABDLY 0014000
+#define   TAB0 0000000
+#define   TAB1 0004000
+#define   TAB2 0010000
+#define   TAB3 0014000
+#define   XTABS        0014000
+#define BSDLY  0020000
+#define   BS0  0000000
+#define   BS1  0020000
+#define VTDLY  0040000
+#define   VT0  0000000
+#define   VT1  0040000
+#define FFDLY  0100000
+#define   FF0  0000000
+#define   FF1  0100000
+
+/* c_cflag bit meaning */
+#define CBAUD  0010017
+#define  B0    0000000         /* hang up */
+#define  B50   0000001
+#define  B75   0000002
+#define  B110  0000003
+#define  B134  0000004
+#define  B150  0000005
+#define  B200  0000006
+#define  B300  0000007
+#define  B600  0000010
+#define  B1200 0000011
+#define  B1800 0000012
+#define  B2400 0000013
+#define  B4800 0000014
+#define  B9600 0000015
+#define  B19200        0000016
+#define  B38400        0000017
+#define EXTA B19200
+#define EXTB B38400
+#define CSIZE  0000060
+#define   CS5  0000000
+#define   CS6  0000020
+#define   CS7  0000040
+#define   CS8  0000060
+#define CSTOPB 0000100
+#define CREAD  0000200
+#define PARENB 0000400
+#define PARODD 0001000
+#define HUPCL  0002000
+#define CLOCAL 0004000
+#define CBAUDEX 0010000
+#define    B57600 0010001
+#define   B115200 0010002
+#define   B230400 0010003
+#define   B460800 0010004
+#define   B500000 0010005
+#define   B576000 0010006
+#define   B921600 0010007
+#define  B1000000 0010010
+#define  B1152000 0010011
+#define  B1500000 0010012
+#define  B2000000 0010013
+#define  B2500000 0010014
+#define  B3000000 0010015
+#define  B3500000 0010016
+#define  B4000000 0010017
+#define CIBAUD   002003600000  /* input baud rate (not used) */
+#define CMSPAR   010000000000          /* mark or space (stick) parity */
+#define CRTSCTS          020000000000          /* flow control */
+
+/* c_lflag bits */
+#define ISIG   0000001
+#define ICANON 0000002
+#define XCASE  0000004
+#define ECHO   0000010
+#define ECHOE  0000020
+#define ECHOK  0000040
+#define ECHONL 0000100
+#define NOFLSH 0000200
+#define TOSTOP 0000400
+#define ECHOCTL        0001000
+#define ECHOPRT        0002000
+#define ECHOKE 0004000
+#define FLUSHO 0010000
+#define PENDIN 0040000
+#define IEXTEN 0100000
+
+/* tcflow() and TCXONC use these */
+#define        TCOOFF          0
+#define        TCOON           1
+#define        TCIOFF          2
+#define        TCION           3
+
+/* tcflush() and TCFLSH use these */
+#define        TCIFLUSH        0
+#define        TCOFLUSH        1
+#define        TCIOFLUSH       2
+
+/* tcsetattr uses these */
+#define        TCSANOW         0
+#define        TCSADRAIN       1
+#define        TCSAFLUSH       2
+
+#endif
diff --git a/include/asm-x86_64/termios.h b/include/asm-x86_64/termios.h
new file mode 100644 (file)
index 0000000..041a91f
--- /dev/null
@@ -0,0 +1,106 @@
+#ifndef _X8664_TERMIOS_H
+#define _X8664_TERMIOS_H
+
+#include <asm/termbits.h>
+#include <asm/ioctls.h>
+
+struct winsize {
+       unsigned short ws_row;
+       unsigned short ws_col;
+       unsigned short ws_xpixel;
+       unsigned short ws_ypixel;
+};
+
+#define NCC 8
+struct termio {
+       unsigned short c_iflag;         /* input mode flags */
+       unsigned short c_oflag;         /* output mode flags */
+       unsigned short c_cflag;         /* control mode flags */
+       unsigned short c_lflag;         /* local mode flags */
+       unsigned char c_line;           /* line discipline */
+       unsigned char c_cc[NCC];        /* control characters */
+};
+
+/* modem lines */
+#define TIOCM_LE       0x001
+#define TIOCM_DTR      0x002
+#define TIOCM_RTS      0x004
+#define TIOCM_ST       0x008
+#define TIOCM_SR       0x010
+#define TIOCM_CTS      0x020
+#define TIOCM_CAR      0x040
+#define TIOCM_RNG      0x080
+#define TIOCM_DSR      0x100
+#define TIOCM_CD       TIOCM_CAR
+#define TIOCM_RI       TIOCM_RNG
+#define TIOCM_OUT1     0x2000
+#define TIOCM_OUT2     0x4000
+#define TIOCM_LOOP     0x8000
+
+/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */
+
+/* line disciplines */
+#define N_TTY          0
+#define N_SLIP         1
+#define N_MOUSE                2
+#define N_PPP          3
+#define N_STRIP                4
+#define N_AX25         5
+#define N_X25          6       /* X.25 async */
+#define N_6PACK                7
+#define N_MASC         8       /* Reserved for Mobitex module <kaz@cafe.net> */
+#define N_R3964                9       /* Reserved for Simatic R3964 module */
+#define N_PROFIBUS_FDL 10      /* Reserved for Profibus <Dave@mvhi.com> */
+#define N_IRDA         11      /* Linux IR - http://irda.sourceforge.net/ */
+#define N_SMSBLOCK     12      /* SMS block mode - for talking to GSM data cards about SMS messages */
+#define N_HDLC         13      /* synchronous HDLC */
+#define N_SYNC_PPP     14      /* synchronous PPP */
+#define N_HCI          15  /* Bluetooth HCI UART */
+
+#ifdef __KERNEL__
+
+/*     intr=^C         quit=^\         erase=del       kill=^U
+       eof=^D          vtime=\0        vmin=\1         sxtc=\0
+       start=^Q        stop=^S         susp=^Z         eol=\0
+       reprint=^R      discard=^U      werase=^W       lnext=^V
+       eol2=\0
+*/
+#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0"
+
+/*
+ * Translate a "termio" structure into a "termios". Ugh.
+ */
+#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \
+       unsigned short __tmp; \
+       get_user(__tmp,&(termio)->x); \
+       *(unsigned short *) &(termios)->x = __tmp; \
+}
+
+#define user_termio_to_kernel_termios(termios, termio) \
+({ \
+       SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); \
+       SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); \
+       SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); \
+       SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); \
+       copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \
+})
+
+/*
+ * Translate a "termios" structure into a "termio". Ugh.
+ */
+#define kernel_termios_to_user_termio(termio, termios) \
+({ \
+       put_user((termios)->c_iflag, &(termio)->c_iflag); \
+       put_user((termios)->c_oflag, &(termio)->c_oflag); \
+       put_user((termios)->c_cflag, &(termio)->c_cflag); \
+       put_user((termios)->c_lflag, &(termio)->c_lflag); \
+       put_user((termios)->c_line,  &(termio)->c_line); \
+       copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \
+})
+
+#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios))
+#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios))
+
+#endif /* __KERNEL__ */
+
+#endif /* _X8664_TERMIOS_H */
diff --git a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h
new file mode 100644 (file)
index 0000000..19baf04
--- /dev/null
@@ -0,0 +1,116 @@
+/* thread_info.h: x86_64 low-level thread information
+ *
+ * Copyright (C) 2002  David Howells (dhowells@redhat.com)
+ * - Incorporating suggestions made by Linus Torvalds and Dave Miller
+ */
+
+#ifndef _ASM_THREAD_INFO_H
+#define _ASM_THREAD_INFO_H
+
+#ifdef __KERNEL__
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#include <linux/config.h>
+#include <asm/pda.h>
+#endif
+
+/*
+ * low level task data that entry.S needs immediate access to
+ * - this struct should fit entirely inside of one cache line
+ * - this struct shares the supervisor stack pages
+ */
+#ifndef __ASSEMBLY__
+struct thread_info {
+       struct task_struct      *task;          /* main task structure */
+       struct exec_domain      *exec_domain;   /* execution domain */
+       __u32                   flags;          /* low level flags */
+       __u32                   cpu;            /* current CPU */
+       int                     preempt_count;
+
+       mm_segment_t            addr_limit;     
+};
+
+#endif
+
+/*
+ * macros/functions for gaining access to the thread information structure
+ */
+#ifndef __ASSEMBLY__
+#define INIT_THREAD_INFO(tsk)                  \
+{                                              \
+       task:           &tsk,                   \
+       exec_domain:    &default_exec_domain,   \
+       flags:          0,                      \
+       cpu:            0,                      \
+       addr_limit:     KERNEL_DS,              \
+}
+
+#define init_thread_info       (init_thread_union.thread_info)
+#define init_stack             (init_thread_union.stack)
+
+/* how to get the thread information struct from C */
+
+#ifdef CONFIG_PREEMPT 
+/* Preemptive kernels need to access this from interrupt context too. */ 
+static inline struct thread_info *current_thread_info(void)
+{ 
+       struct thread_info *ti;
+       ti = (void *)read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE;
+       return ti; 
+} 
+#else
+/* On others go for a minimally cheaper way. */
+static inline struct thread_info *current_thread_info(void)
+{
+       struct thread_info *ti;
+       __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (~8191UL));
+       return ti;
+}
+#endif
+
+/* thread information allocation */
+#define THREAD_SIZE (2*PAGE_SIZE)
+#define alloc_thread_info() ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
+#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
+#define get_thread_info(ti) get_task_struct((ti)->task)
+#define put_thread_info(ti) put_task_struct((ti)->task)
+
+#else /* !__ASSEMBLY__ */
+
+/* how to get the thread information struct from ASM */
+#define GET_THREAD_INFO(reg) \
+       movq $-8192, reg; \
+       andq %rsp, reg
+
+#endif
+
+/*
+ * thread information flags
+ * - these are process state flags that various assembly files may need to access
+ * - pending work-to-be-done flags are in LSW
+ * - other flags in MSW
+ * Warning: layout of LSW is hardcoded in entry.S
+ */
+#define TIF_SYSCALL_TRACE      0       /* syscall trace active */
+#define TIF_NOTIFY_RESUME      1       /* resumption notification requested */
+#define TIF_SIGPENDING         2       /* signal pending */
+#define TIF_NEED_RESCHED       3       /* rescheduling necessary */
+#define TIF_USEDFPU            16      /* FPU was used by this task this quantum (SMP) */
+#define TIF_POLLING_NRFLAG     17      /* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_IA32               18      /* 32bit process */ 
+
+#define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME     (1<<TIF_NOTIFY_RESUME)
+#define _TIF_SIGPENDING                (1<<TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED      (1<<TIF_NEED_RESCHED)
+#define _TIF_USEDFPU           (1<<TIF_USEDFPU)
+#define _TIF_POLLING_NRFLAG    (1<<TIF_POLLING_NRFLAG)
+#define _TIF_IA32              (1<<TIF_IA32)
+
+#define _TIF_WORK_MASK         0x0000FFFE      /* work to do on interrupt/exception return */
+#define _TIF_ALLWORK_MASK      0x0000FFFF      /* work to do on any return to u-space */
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_THREAD_INFO_H */
diff --git a/include/asm-x86_64/timex.h b/include/asm-x86_64/timex.h
new file mode 100644 (file)
index 0000000..a022d45
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * linux/include/asm-x8664/timex.h
+ *
+ * x8664 architecture timex specifications
+ */
+#ifndef _ASMx8664_TIMEX_H
+#define _ASMx8664_TIMEX_H
+
+#include <linux/config.h>
+#include <asm/msr.h>
+#include <asm/vsyscall.h>
+
+#define CLOCK_TICK_RATE        1193180 /* Underlying HZ */
+#define CLOCK_TICK_FACTOR      20      /* Factor of both 1000000 and CLOCK_TICK_RATE */
+#define FINETUNE ((((((long)LATCH * HZ - CLOCK_TICK_RATE) << SHIFT_HZ) * \
+       (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \
+               << (SHIFT_SCALE-SHIFT_HZ)) / HZ)
+
+/*
+ * Standard way to access the cycle counter on i586+ CPUs.
+ * Currently only used on SMP.
+ *
+ * If you really have a SMP machine with i486 chips or older,
+ * compile for that, and this will just always return zero.
+ * That's ok, it just means that the nicer scheduling heuristics
+ * won't work for you.
+ *
+ * We only use the low 32 bits, and we'd simply better make sure
+ * that we reschedule before that wraps. Scheduling at least every
+ * four billion cycles just basically sounds like a good idea,
+ * regardless of how fast the machine is. 
+ */
+typedef unsigned long long cycles_t;
+
+extern cycles_t cacheflush_time;
+
+static inline cycles_t get_cycles (void)
+{
+#ifndef CONFIG_X86_TSC
+       return 0;
+#else
+       unsigned long long ret;
+
+       rdtscll(ret);
+       return ret;
+#endif
+}
+
+extern unsigned long cpu_khz;
+
+#endif
diff --git a/include/asm-x86_64/tlb.h b/include/asm-x86_64/tlb.h
new file mode 100644 (file)
index 0000000..69c0faa
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/tlb.h>
diff --git a/include/asm-x86_64/types.h b/include/asm-x86_64/types.h
new file mode 100644 (file)
index 0000000..a3aa354
--- /dev/null
@@ -0,0 +1,47 @@
+#ifndef _X86_64_TYPES_H
+#define _X86_64_TYPES_H
+
+typedef unsigned short umode_t;
+
+/*
+ * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
+ * header files exported to user space
+ */
+
+typedef __signed__ char __s8;
+typedef unsigned char __u8;
+
+typedef __signed__ short __s16;
+typedef unsigned short __u16;
+
+typedef __signed__ int __s32;
+typedef unsigned int __u32;
+
+typedef __signed__ long __s64;
+typedef unsigned long  __u64;
+
+/*
+ * These aren't exported outside the kernel to avoid name space clashes
+ */
+#ifdef __KERNEL__
+
+typedef signed char s8;
+typedef unsigned char u8;
+
+typedef signed short s16;
+typedef unsigned short u16;
+
+typedef signed int s32;
+typedef unsigned int u32;
+
+typedef signed long s64;
+typedef unsigned long u64;
+
+#define BITS_PER_LONG 64
+
+typedef u32 dma64_addr_t;
+typedef u64 dma_addr_t;
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/include/asm-x86_64/uaccess.h b/include/asm-x86_64/uaccess.h
new file mode 100644 (file)
index 0000000..4ffa30b
--- /dev/null
@@ -0,0 +1,373 @@
+#ifndef __X86_64_UACCESS_H
+#define __X86_64_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/prefetch.h>
+#include <asm/page.h>
+
+#define VERIFY_READ 0
+#define VERIFY_WRITE 1
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
+
+#define KERNEL_DS      MAKE_MM_SEG(0xFFFFFFFFFFFFFFFF)
+#define USER_DS                MAKE_MM_SEG(PAGE_OFFSET)
+
+#define get_ds()       (KERNEL_DS)
+#define get_fs()       (current_thread_info()->addr_limit)
+#define set_fs(x)      (current_thread_info()->addr_limit = (x))
+
+#define segment_eq(a,b)        ((a).seg == (b).seg)
+
+#define __addr_ok(addr) (!((unsigned long)(addr) & (current_thread_info()->addr_limit.seg)))
+
+/*
+ * Uhhuh, this needs 65-bit arithmetic. We have a carry..
+ */
+#define __range_not_ok(addr,size) ({ \
+       unsigned long flag,sum; \
+       asm("# range_ok\n\r" \
+               "addq %3,%1 ; sbbq %0,%0 ; cmpq %1,%4 ; sbbq $0,%0"  \
+               :"=&r" (flag), "=r" (sum) \
+               :"1" (addr),"g" ((long)(size)),"g" (current_thread_info()->addr_limit.seg)); \
+       flag; })
+
+#define access_ok(type,addr,size) (__range_not_ok(addr,size) == 0)
+
+extern inline int verify_area(int type, const void * addr, unsigned long size)
+{
+       return access_ok(type,addr,size) ? 0 : -EFAULT;
+}
+
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry
+{
+       unsigned long insn, fixup;
+};
+
+/* Returns 0 if exception not found and fixup otherwise.  */
+extern unsigned long search_exception_table(unsigned long);
+
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ *
+ * This gets kind of ugly. We want to return _two_ values in "get_user()"
+ * and yet we don't want to do any pointers, because that is too much
+ * of a performance impact. Thus we have a few rather ugly macros here,
+ * and hide all the uglyness from the user.
+ *
+ * The "__xxx" versions of the user access functions are versions that
+ * do not verify the address space, that must have been done previously
+ * with a separate "access_ok()" call (this is used when we do multiple
+ * accesses to the same area of user memory).
+ */
+
+extern void __get_user_1(void);
+extern void __get_user_2(void);
+extern void __get_user_4(void);
+extern void __get_user_8(void);
+
+#define __get_user_x(size,ret,x,ptr) \
+       __asm__ __volatile__("call __get_user_" #size \
+               :"=a" (ret),"=d" (x) \
+               :"0" (ptr) \
+               :"rbx")
+
+/* Careful: we have to cast the result to the type of the pointer for sign reasons */
+#define get_user(x,ptr)                                                        \
+({     long __ret_gu,__val_gu;                                         \
+       switch(sizeof (*(ptr))) {                                       \
+       case 1:  __get_user_x(1,__ret_gu,__val_gu,ptr); break;          \
+       case 2:  __get_user_x(2,__ret_gu,__val_gu,ptr); break;          \
+       case 4:  __get_user_x(4,__ret_gu,__val_gu,ptr); break;          \
+       case 8:  __get_user_x(8,__ret_gu,__val_gu,ptr); break;          \
+       default: __get_user_bad(); break;                               \
+       }                                                               \
+       (x) = (__typeof__(*(ptr)))__val_gu;                             \
+       __ret_gu;                                                       \
+})
+
+extern void __put_user_1(void);
+extern void __put_user_2(void);
+extern void __put_user_4(void);
+extern void __put_user_8(void);
+
+extern void __put_user_bad(void);
+
+#define __put_user_x(size,ret,x,ptr)                                   \
+       __asm__ __volatile__("call __put_user_" #size                   \
+               :"=a" (ret)                                             \
+               :"0" (ptr),"d" (x)                                      \
+               :"rbx")
+
+#define put_user(x,ptr)                                                        \
+  __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+#define __get_user(x,ptr) \
+  __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
+#define __put_user(x,ptr) \
+  __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+
+#define __put_user_nocheck(x,ptr,size)                 \
+({                                                     \
+       long __pu_err;                                  \
+       __put_user_size((x),(ptr),(size),__pu_err);     \
+       __pu_err;                                       \
+})
+
+
+#define __put_user_check(x,ptr,size)                   \
+({                                                     \
+       long __pu_err = -EFAULT;                        \
+       __typeof__(*(ptr)) *__pu_addr = (ptr);          \
+       if (access_ok(VERIFY_WRITE,__pu_addr,size))     \
+               __put_user_size((x),__pu_addr,(size),__pu_err); \
+       __pu_err;                                       \
+})
+
+#define __put_user_size(x,ptr,size,retval)                             \
+do {                                                                   \
+       retval = 0;                                                     \
+       switch (size) {                                                 \
+         case 1: __put_user_asm(x,ptr,retval,"b","b","iq"); break;     \
+         case 2: __put_user_asm(x,ptr,retval,"w","w","ir"); break;     \
+         case 4: __put_user_asm(x,ptr,retval,"l","k","ir"); break;     \
+         case 8: __put_user_asm(x,ptr,retval,"q","","ir"); break;      \
+         default: __put_user_bad();                                    \
+       }                                                               \
+} while (0)
+
+/* FIXME: this hack is definitely wrong -AK */
+struct __large_struct { unsigned long buf[100]; };
+#define __m(x) (*(struct __large_struct *)(x))
+
+/*
+ * Tell gcc we read from memory instead of writing: this is because
+ * we do not write to any memory gcc knows about, so there are no
+ * aliasing issues.
+ */
+#define __put_user_asm(x, addr, err, itype, rtype, ltype)      \
+       __asm__ __volatile__(                                   \
+               "1:     mov"itype" %"rtype"1,%2\n"              \
+               "2:\n"                                          \
+               ".section .fixup,\"ax\"\n"                      \
+               "3:     movq %3,%0\n"                           \
+               "       jmp 2b\n"                               \
+               ".previous\n"                                   \
+               ".section __ex_table,\"a\"\n"                   \
+               "       .align 8\n"                             \
+               "       .quad 1b,3b\n"                          \
+               ".previous"                                     \
+               : "=r"(err)                                     \
+               : ltype (x), "m"(__m(addr)), "i"(-EFAULT), "0"(err))
+
+
+#define __get_user_nocheck(x,ptr,size)                         \
+({                                                             \
+       long __gu_err, __gu_val;                                \
+       __get_user_size(__gu_val,(ptr),(size),__gu_err);        \
+       (x) = (__typeof__(*(ptr)))__gu_val;                     \
+       __gu_err;                                               \
+})
+
+extern long __get_user_bad(void);
+
+#define __get_user_size(x,ptr,size,retval)                             \
+do {                                                                   \
+       retval = 0;                                                     \
+       switch (size) {                                                 \
+         case 1: __get_user_asm(x,ptr,retval,"b","b","=q"); break;     \
+         case 2: __get_user_asm(x,ptr,retval,"w","w","=r"); break;     \
+         case 4: __get_user_asm(x,ptr,retval,"l","k","=r"); break;     \
+         case 8: __get_user_asm(x,ptr,retval,"q","","=r"); break;      \
+         default: (x) = __get_user_bad();                              \
+       }                                                               \
+} while (0)
+
+#define __get_user_asm(x, addr, err, itype, rtype, ltype)      \
+       __asm__ __volatile__(                                   \
+               "1:     mov"itype" %2,%"rtype"1\n"              \
+               "2:\n"                                          \
+               ".section .fixup,\"ax\"\n"                      \
+               "3:     mov %3,%0\n"                            \
+               "       xor"itype" %"rtype"1,%"rtype"1\n"       \
+               "       jmp 2b\n"                               \
+               ".previous\n"                                   \
+               ".section __ex_table,\"a\"\n"                   \
+               "       .align 8\n"                             \
+               "       .quad 1b,3b\n"                          \
+               ".previous"                                     \
+               : "=r"(err), ltype (x)                          \
+               : "m"(__m(addr)), "i"(-EFAULT), "0"(err))
+
+/*
+ * Copy To/From Userspace
+ */
+
+/* Generic arbitrary sized copy.  */
+
+/* Could do 8byte accesses, instead of 4bytes. */ 
+
+/* Generic arbitrary sized copy.  */
+#define __copy_user(to,from,size)                                      \
+do {                                                                   \
+       long __d0, __d1;                                                \
+       __asm__ __volatile__(                                           \
+               "0:     rep; movsl\n"                                   \
+               "       movq %3,%0\n"                                   \
+               "1:     rep; movsb\n"                                   \
+               "2:\n"                                                  \
+               ".section .fixup,\"ax\"\n"                              \
+               "3:     lea 0(%3,%0,4),%0\n"                            \
+               "       jmp 2b\n"                                       \
+               ".previous\n"                                           \
+               ".section __ex_table,\"a\"\n"                           \
+               "       .align 8\n"                                     \
+               "       .quad 0b,3b\n"                                  \
+               "       .quad 1b,2b\n"                                  \
+               ".previous"                                             \
+               : "=&c"(size), "=&D" (__d0), "=&S" (__d1)               \
+               : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from)      \
+               : "memory");                                            \
+} while (0)
+
+#define __copy_user_zeroing(to,from,size)                              \
+do {                                                                   \
+       long __d0, __d1;                                                \
+       __asm__ __volatile__(                                           \
+               "0:     rep; movsl\n"                                   \
+               "       movq %3,%0\n"                                   \
+               "1:     rep; movsb\n"                                   \
+               "2:\n"                                                  \
+               ".section .fixup,\"ax\"\n"                              \
+               "3:     lea 0(%3,%0,4),%0\n"                            \
+               "4:     pushq %0\n"                                     \
+               "       pushq %%rax\n"                                  \
+               "       xorq %%rax,%%rax\n"                             \
+               "       rep; stosb\n"                                   \
+               "       popq %%rax\n"                                   \
+               "       popq %0\n"                                      \
+               "       jmp 2b\n"                                       \
+               ".previous\n"                                           \
+               ".section __ex_table,\"a\"\n"                           \
+               "       .align 8\n"                                     \
+               "       .quad 0b,3b\n"                                  \
+               "       .quad 1b,4b\n"                                  \
+               ".previous"                                             \
+               : "=&c"(size), "=&D" (__d0), "=&S" (__d1)               \
+               : "r"(size & 3), "0"(size / 4), "1"(to), "2"(from)      \
+               : "memory");                                            \
+} while (0)
+
+
+/* We let the __ versions of copy_from/to_user inline, because they're often
+ * used in fast paths and have only a small space overhead.
+ */
+static inline unsigned long
+__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
+{
+       __copy_user_zeroing(to,from,n);
+       return n;
+}
+
+static inline unsigned long
+__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
+{
+       prefetch(from);
+       __copy_user(to,from,n);
+       return n;
+}
+
+
+
+unsigned long __generic_copy_to_user(void *, const void *, unsigned long);
+unsigned long __generic_copy_from_user(void *, const void *, unsigned long);
+
+static inline unsigned long
+__constant_copy_to_user(void *to, const void *from, unsigned long n)
+{
+       if (access_ok(VERIFY_WRITE, to, n))
+               __copy_user(to,from,n);
+       return n;
+}
+
+static inline unsigned long
+__constant_copy_from_user(void *to, const void *from, unsigned long n)
+{
+       if (access_ok(VERIFY_READ, from, n))
+               __copy_user_zeroing(to,from,n);
+       else
+               memset(to, 0, n);
+       return n;
+}
+
+static inline unsigned long
+__constant_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
+{
+       __copy_user(to,from,n);
+       return n;
+}
+
+static inline unsigned long
+__constant_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
+{
+       __copy_user_zeroing(to,from,n);
+       return n;
+}
+
+#define copy_to_user(to,from,n)                                \
+       (__builtin_constant_p(n) ?                      \
+        __constant_copy_to_user((to),(from),(n)) :     \
+        __generic_copy_to_user((to),(from),(n)))
+
+#define copy_from_user(to,from,n)                      \
+       (__builtin_constant_p(n) ?                      \
+        __constant_copy_from_user((to),(from),(n)) :   \
+        __generic_copy_from_user((to),(from),(n)))
+
+#define __copy_to_user(to,from,n)                      \
+       (__builtin_constant_p(n) ?                      \
+        __constant_copy_to_user_nocheck((to),(from),(n)) :     \
+        __generic_copy_to_user_nocheck((to),(from),(n)))
+
+#define __copy_from_user(to,from,n)                    \
+       (__builtin_constant_p(n) ?                      \
+        __constant_copy_from_user_nocheck((to),(from),(n)) :   \
+        __generic_copy_from_user_nocheck((to),(from),(n)))
+
+long strncpy_from_user(char *dst, const char *src, long count);
+long __strncpy_from_user(char *dst, const char *src, long count);
+#define strlen_user(str) strnlen_user(str, ~0UL >> 1)
+long strnlen_user(const char *str, long n);
+unsigned long clear_user(void *mem, unsigned long len);
+unsigned long __clear_user(void *mem, unsigned long len);
+
+#endif /* __X86_64_UACCESS_H */
diff --git a/include/asm-x86_64/ucontext.h b/include/asm-x86_64/ucontext.h
new file mode 100644 (file)
index 0000000..159a3da
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef _ASMX8664_UCONTEXT_H
+#define _ASMX8664_UCONTEXT_H
+
+struct ucontext {
+       unsigned long     uc_flags;
+       struct ucontext  *uc_link;
+       stack_t           uc_stack;
+       struct sigcontext uc_mcontext;
+       sigset_t          uc_sigmask;   /* mask last for extensibility */
+};
+
+#endif
diff --git a/include/asm-x86_64/unaligned.h b/include/asm-x86_64/unaligned.h
new file mode 100644 (file)
index 0000000..d4bf78d
--- /dev/null
@@ -0,0 +1,37 @@
+#ifndef __X8664_UNALIGNED_H
+#define __X8664_UNALIGNED_H
+
+/*
+ * The x86-64 can do unaligned accesses itself. 
+ *
+ * The strange macros are there to make sure these can't
+ * be misused in a way that makes them not work on other
+ * architectures where unaligned accesses aren't as simple.
+ */
+
+/**
+ * get_unaligned - get value from possibly mis-aligned location
+ * @ptr: pointer to value
+ *
+ * This macro should be used for accessing values larger in size than 
+ * single bytes at locations that are expected to be improperly aligned, 
+ * e.g. retrieving a u16 value from a location not u16-aligned.
+ *
+ * Note that unaligned accesses can be very expensive on some architectures.
+ */
+#define get_unaligned(ptr) (*(ptr))
+
+/**
+ * put_unaligned - put value to a possibly mis-aligned location
+ * @val: value to place
+ * @ptr: pointer to location
+ *
+ * This macro should be used for placing values larger in size than 
+ * single bytes at locations that are expected to be improperly aligned, 
+ * e.g. writing a u16 value to a location not u16-aligned.
+ *
+ * Note that unaligned accesses can be very expensive on some architectures.
+ */
+#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) ))
+
+#endif
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
new file mode 100644 (file)
index 0000000..21257ec
--- /dev/null
@@ -0,0 +1,653 @@
+#ifndef _ASM_X86_64_UNISTD_H_
+#define _ASM_X86_64_UNISTD_H_
+
+#ifndef __SYSCALL
+#define __SYSCALL(a,b) 
+#endif
+
+/*
+ * This file contains the system call numbers.
+ * 
+ * Note: holes are not allowed.
+ */
+
+/* at least 8 syscall per cacheline */
+#define __NR_read                                0
+__SYSCALL(__NR_read, sys_read)
+#define __NR_write                               1
+__SYSCALL(__NR_write, sys_write)
+#define __NR_open                                2
+__SYSCALL(__NR_open, sys_open)
+#define __NR_close                               3
+__SYSCALL(__NR_close, sys_close)
+#define __NR_stat                                4
+__SYSCALL(__NR_stat, sys_newstat)
+#define __NR_fstat                               5
+__SYSCALL(__NR_fstat, sys_newfstat)
+#define __NR_lstat                               6
+__SYSCALL(__NR_lstat, sys_newlstat)
+#define __NR_poll                                7
+__SYSCALL(__NR_poll, sys_poll)
+
+#define __NR_lseek                               8
+__SYSCALL(__NR_lseek, sys_lseek)
+#define __NR_mmap                                9
+__SYSCALL(__NR_mmap, sys_mmap)
+#define __NR_mprotect                           10
+__SYSCALL(__NR_mprotect, sys_mprotect)
+#define __NR_munmap                             11
+__SYSCALL(__NR_munmap, sys_munmap)
+#define __NR_brk                                12
+__SYSCALL(__NR_brk, sys_brk)
+#define __NR_rt_sigaction                       13
+__SYSCALL(__NR_rt_sigaction, sys_rt_sigaction)
+#define __NR_rt_sigprocmask                     14
+__SYSCALL(__NR_rt_sigprocmask, sys_rt_sigprocmask)
+#define __NR_rt_sigreturn                       15
+__SYSCALL(__NR_rt_sigreturn, stub_rt_sigreturn)
+
+#define __NR_ioctl                              16
+__SYSCALL(__NR_ioctl, sys_ioctl)
+#define __NR_pread                              17
+__SYSCALL(__NR_pread, sys_pread)
+#define __NR_pwrite                             18
+__SYSCALL(__NR_pwrite, sys_pwrite)
+#define __NR_readv                              19
+__SYSCALL(__NR_readv, sys_readv)
+#define __NR_writev                             20
+__SYSCALL(__NR_writev, sys_writev)
+#define __NR_access                             21
+__SYSCALL(__NR_access, sys_access)
+#define __NR_pipe                               22
+__SYSCALL(__NR_pipe, sys_pipe)
+#define __NR_select                             23
+__SYSCALL(__NR_select, sys_select)
+
+#define __NR_sched_yield                        24
+__SYSCALL(__NR_sched_yield, sys_sched_yield)
+#define __NR_mremap                             25
+__SYSCALL(__NR_mremap, sys_mremap)
+#define __NR_msync                              26
+__SYSCALL(__NR_msync, sys_msync)
+#define __NR_mincore                            27
+__SYSCALL(__NR_mincore, sys_mincore)
+#define __NR_madvise                            28
+__SYSCALL(__NR_madvise, sys_madvise)
+#define __NR_shmget                             29
+__SYSCALL(__NR_shmget, sys_shmget)
+#define __NR_shmat                              30
+__SYSCALL(__NR_shmat, sys_shmat)
+#define __NR_shmctl                             31
+__SYSCALL(__NR_shmctl, sys_shmctl)
+
+#define __NR_dup                                32
+__SYSCALL(__NR_dup, sys_dup)
+#define __NR_dup2                               33
+__SYSCALL(__NR_dup2, sys_dup2)
+#define __NR_pause                              34
+__SYSCALL(__NR_pause, sys_pause)
+#define __NR_nanosleep                          35
+__SYSCALL(__NR_nanosleep, sys_nanosleep)
+#define __NR_getitimer                          36
+__SYSCALL(__NR_getitimer, sys_getitimer)
+#define __NR_alarm                              37
+__SYSCALL(__NR_alarm, sys_alarm)
+#define __NR_setitimer                          38
+__SYSCALL(__NR_setitimer, sys_setitimer)
+#define __NR_getpid                             39
+__SYSCALL(__NR_getpid, sys_getpid)
+
+#define __NR_sendfile                           40
+__SYSCALL(__NR_sendfile, sys_sendfile)
+#define __NR_socket                             41
+__SYSCALL(__NR_socket, sys_socket)
+#define __NR_connect                            42
+__SYSCALL(__NR_connect, sys_connect)
+#define __NR_accept                             43
+__SYSCALL(__NR_accept, sys_accept)
+#define __NR_sendto                             44
+__SYSCALL(__NR_sendto, sys_sendto)
+#define __NR_recvfrom                           45
+__SYSCALL(__NR_recvfrom, sys_recvfrom)
+#define __NR_sendmsg                            46
+__SYSCALL(__NR_sendmsg, sys_sendmsg)
+#define __NR_recvmsg                            47
+__SYSCALL(__NR_recvmsg, sys_recvmsg)
+
+#define __NR_shutdown                           48
+__SYSCALL(__NR_shutdown, sys_shutdown)
+#define __NR_bind                               49
+__SYSCALL(__NR_bind, sys_bind)
+#define __NR_listen                             50
+__SYSCALL(__NR_listen, sys_listen)
+#define __NR_getsockname                        51
+__SYSCALL(__NR_getsockname, sys_getsockname)
+#define __NR_getpeername                        52
+__SYSCALL(__NR_getpeername, sys_getpeername)
+#define __NR_socketpair                         53
+__SYSCALL(__NR_socketpair, sys_socketpair)
+#define __NR_setsockopt                         54
+__SYSCALL(__NR_setsockopt, sys_setsockopt)
+#define __NR_getsockopt                         55
+__SYSCALL(__NR_getsockopt, sys_getsockopt)
+
+#define __NR_clone                              56
+__SYSCALL(__NR_clone, stub_clone)
+#define __NR_fork                               57
+__SYSCALL(__NR_fork, stub_fork) 
+#define __NR_vfork                              58
+__SYSCALL(__NR_vfork, stub_vfork)
+#define __NR_execve                             59
+__SYSCALL(__NR_execve, stub_execve)
+#define __NR_exit                               60
+__SYSCALL(__NR_exit, sys_exit)
+#define __NR_wait4                              61
+__SYSCALL(__NR_wait4, sys_wait4)
+#define __NR_kill                               62
+__SYSCALL(__NR_kill, sys_kill)
+#define __NR_uname                              63
+__SYSCALL(__NR_uname, sys_uname)
+
+#define __NR_semget                             64
+__SYSCALL(__NR_semget, sys_semget)
+#define __NR_semop                              65
+__SYSCALL(__NR_semop, sys_semop)
+#define __NR_semctl                             66
+__SYSCALL(__NR_semctl, sys_semctl)
+#define __NR_shmdt                              67
+__SYSCALL(__NR_shmdt, sys_shmdt)
+#define __NR_msgget                             68
+__SYSCALL(__NR_msgget, sys_msgget)
+#define __NR_msgsnd                             69
+__SYSCALL(__NR_msgsnd, sys_msgsnd)
+#define __NR_msgrcv                             70
+__SYSCALL(__NR_msgrcv, sys_msgrcv)
+#define __NR_msgctl                             71
+__SYSCALL(__NR_msgctl, sys_msgctl)
+
+#define __NR_fcntl                              72
+__SYSCALL(__NR_fcntl, sys_fcntl)
+#define __NR_flock                              73
+__SYSCALL(__NR_flock, sys_flock)
+#define __NR_fsync                              74
+__SYSCALL(__NR_fsync, sys_fsync)
+#define __NR_fdatasync                          75
+__SYSCALL(__NR_fdatasync, sys_fdatasync)
+#define __NR_truncate                           76
+__SYSCALL(__NR_truncate, sys_truncate)
+#define __NR_ftruncate                          77
+__SYSCALL(__NR_ftruncate, sys_ftruncate)
+#define __NR_getdents                           78
+__SYSCALL(__NR_getdents, sys_getdents)
+#define __NR_getcwd                             79
+__SYSCALL(__NR_getcwd, sys_getcwd)
+
+#define __NR_chdir                              80
+__SYSCALL(__NR_chdir, sys_chdir)
+#define __NR_fchdir                             81
+__SYSCALL(__NR_fchdir, sys_fchdir)
+#define __NR_rename                             82
+__SYSCALL(__NR_rename, sys_rename)
+#define __NR_mkdir                              83
+__SYSCALL(__NR_mkdir, sys_mkdir)
+#define __NR_rmdir                              84
+__SYSCALL(__NR_rmdir, sys_rmdir)
+#define __NR_creat                              85
+__SYSCALL(__NR_creat, sys_creat)
+#define __NR_link                               86
+__SYSCALL(__NR_link, sys_link)
+#define __NR_unlink                             87
+__SYSCALL(__NR_unlink, sys_unlink)
+
+#define __NR_symlink                            88
+__SYSCALL(__NR_symlink, sys_symlink)
+#define __NR_readlink                           89
+__SYSCALL(__NR_readlink, sys_readlink)
+#define __NR_chmod                              90
+__SYSCALL(__NR_chmod, sys_chmod)
+#define __NR_fchmod                             91
+__SYSCALL(__NR_fchmod, sys_fchmod)
+#define __NR_chown                              92
+__SYSCALL(__NR_chown, sys_chown)
+#define __NR_fchown                             93
+__SYSCALL(__NR_fchown, sys_fchown)
+#define __NR_lchown                             94
+__SYSCALL(__NR_lchown, sys_lchown)
+#define __NR_umask                              95
+__SYSCALL(__NR_umask, sys_umask)
+
+#define __NR_gettimeofday                       96
+__SYSCALL(__NR_gettimeofday, sys_gettimeofday)
+#define __NR_getrlimit                          97
+__SYSCALL(__NR_getrlimit, sys_getrlimit)
+#define __NR_getrusage                          98
+__SYSCALL(__NR_getrusage, sys_getrusage)
+#define __NR_sysinfo                            99
+__SYSCALL(__NR_sysinfo, sys_sysinfo)
+#define __NR_times                             100
+__SYSCALL(__NR_times, sys_times)
+#define __NR_ptrace                            101
+__SYSCALL(__NR_ptrace, sys_ptrace)
+#define __NR_getuid                            102
+__SYSCALL(__NR_getuid, sys_getuid)
+#define __NR_syslog                            103
+__SYSCALL(__NR_syslog, sys_syslog)
+
+/* at the very end the stuff that never runs during the benchmarks */
+#define __NR_getgid                            104
+__SYSCALL(__NR_getgid, sys_getgid)
+#define __NR_setuid                            105
+__SYSCALL(__NR_setuid, sys_setuid)
+#define __NR_setgid                            106
+__SYSCALL(__NR_setgid, sys_setgid)
+#define __NR_geteuid                           107
+__SYSCALL(__NR_geteuid, sys_geteuid)
+#define __NR_getegid                           108
+__SYSCALL(__NR_getegid, sys_getegid)
+#define __NR_setpgid                           109
+__SYSCALL(__NR_setpgid, sys_setpgid)
+#define __NR_getppid                           110
+__SYSCALL(__NR_getppid, sys_getppid)
+#define __NR_getpgrp                           111
+__SYSCALL(__NR_getpgrp, sys_getpgrp)
+
+#define __NR_setsid                            112
+__SYSCALL(__NR_setsid, sys_setsid)
+#define __NR_setreuid                          113
+__SYSCALL(__NR_setreuid, sys_setreuid)
+#define __NR_setregid                          114
+__SYSCALL(__NR_setregid, sys_setregid)
+#define __NR_getgroups                         115
+__SYSCALL(__NR_getgroups, sys_getgroups)
+#define __NR_setgroups                         116
+__SYSCALL(__NR_setgroups, sys_setgroups)
+#define __NR_setresuid                         117
+__SYSCALL(__NR_setresuid, sys_setresuid)
+#define __NR_getresuid                         118
+__SYSCALL(__NR_getresuid, sys_getresuid)
+#define __NR_setresgid                         119
+__SYSCALL(__NR_setresgid, sys_setresgid)
+
+#define __NR_getresgid                         120
+__SYSCALL(__NR_getresgid, sys_getresgid)
+#define __NR_getpgid                           121
+__SYSCALL(__NR_getpgid, sys_getpgid)
+#define __NR_setfsuid                          122
+__SYSCALL(__NR_setfsuid, sys_setfsuid)
+#define __NR_setfsgid                          123
+__SYSCALL(__NR_setfsgid, sys_setfsgid)
+#define __NR_getsid                            124
+__SYSCALL(__NR_getsid, sys_getsid)
+#define __NR_capget                            125
+__SYSCALL(__NR_capget, sys_capget)
+#define __NR_capset                            126
+__SYSCALL(__NR_capset, sys_capset)
+
+#define __NR_rt_sigpending                     127
+__SYSCALL(__NR_rt_sigpending, sys_rt_sigpending)
+#define __NR_rt_sigtimedwait                   128
+__SYSCALL(__NR_rt_sigtimedwait, sys_rt_sigtimedwait)
+#define __NR_rt_sigqueueinfo                   129
+__SYSCALL(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo)
+#define __NR_rt_sigsuspend                     130
+__SYSCALL(__NR_rt_sigsuspend, stub_rt_sigsuspend)
+#define __NR_sigaltstack                       131
+__SYSCALL(__NR_sigaltstack, stub_sigaltstack)
+#define __NR_utime                             132
+__SYSCALL(__NR_utime, sys_utime)
+#define __NR_mknod                             133
+__SYSCALL(__NR_mknod, sys_mknod)
+
+#define __NR_uselib                            134
+__SYSCALL(__NR_uselib, sys_uselib)
+#define __NR_personality                       135
+__SYSCALL(__NR_personality, sys_personality)
+
+#define __NR_ustat                             136
+__SYSCALL(__NR_ustat, sys_ustat)
+#define __NR_statfs                            137
+__SYSCALL(__NR_statfs, sys_statfs)
+#define __NR_fstatfs                           138
+__SYSCALL(__NR_fstatfs, sys_fstatfs)
+#define __NR_sysfs                             139
+__SYSCALL(__NR_sysfs, sys_sysfs)
+
+#define __NR_getpriority                       140
+__SYSCALL(__NR_getpriority, sys_getpriority)
+#define __NR_setpriority                       141
+__SYSCALL(__NR_setpriority, sys_setpriority)
+#define __NR_sched_setparam                    142
+__SYSCALL(__NR_sched_setparam, sys_sched_setparam)
+#define __NR_sched_getparam                    143
+__SYSCALL(__NR_sched_getparam, sys_sched_getparam)
+#define __NR_sched_setscheduler                144
+__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler)
+#define __NR_sched_getscheduler                145
+__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler)
+#define __NR_sched_get_priority_max            146
+__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
+#define __NR_sched_get_priority_min            147
+__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
+#define __NR_sched_rr_get_interval             148
+__SYSCALL(__NR_sched_rr_get_interval, sys_sched_rr_get_interval)
+
+#define __NR_mlock                             149
+__SYSCALL(__NR_mlock, sys_mlock)
+#define __NR_munlock                           150
+__SYSCALL(__NR_munlock, sys_munlock)
+#define __NR_mlockall                          151
+__SYSCALL(__NR_mlockall, sys_mlockall)
+#define __NR_munlockall                        152
+__SYSCALL(__NR_munlockall, sys_munlockall)
+
+#define __NR_vhangup                           153
+__SYSCALL(__NR_vhangup, sys_vhangup)
+
+#define __NR_modify_ldt                        154
+__SYSCALL(__NR_modify_ldt, sys_modify_ldt)
+
+#define __NR_pivot_root                        155
+__SYSCALL(__NR_pivot_root, sys_pivot_root)
+
+#define __NR__sysctl                           156
+__SYSCALL(__NR__sysctl, sys_sysctl)
+
+#define __NR_prctl                             157
+__SYSCALL(__NR_prctl, sys_prctl)
+#define __NR_arch_prctl                        158
+__SYSCALL(__NR_arch_prctl,     sys_arch_prctl) 
+
+#define __NR_adjtimex                          159
+__SYSCALL(__NR_adjtimex, sys_adjtimex)
+
+#define __NR_setrlimit                         160
+__SYSCALL(__NR_setrlimit, sys_setrlimit)
+
+#define __NR_chroot                            161
+__SYSCALL(__NR_chroot, sys_chroot)
+
+#define __NR_sync                              162
+__SYSCALL(__NR_sync, sys_sync)
+
+#define __NR_acct                              163
+__SYSCALL(__NR_acct, sys_acct)
+
+#define __NR_settimeofday                      164
+__SYSCALL(__NR_settimeofday, sys_settimeofday)
+
+#define __NR_mount                             165
+__SYSCALL(__NR_mount, sys_mount)
+#define __NR_umount2                           166
+__SYSCALL(__NR_umount2, sys_umount)
+
+#define __NR_swapon                            167
+__SYSCALL(__NR_swapon, sys_swapon)
+#define __NR_swapoff                           168
+__SYSCALL(__NR_swapoff, sys_swapoff)
+
+#define __NR_reboot                            169
+__SYSCALL(__NR_reboot, sys_reboot)
+
+#define __NR_sethostname                       170
+__SYSCALL(__NR_sethostname, sys_sethostname)
+#define __NR_setdomainname                     171
+__SYSCALL(__NR_setdomainname, sys_setdomainname)
+
+#define __NR_iopl                              172
+__SYSCALL(__NR_iopl, stub_iopl)
+#define __NR_ioperm                            173
+__SYSCALL(__NR_ioperm, sys_ioperm)
+
+#define __NR_create_module                     174
+__SYSCALL(__NR_create_module, sys_create_module)
+#define __NR_init_module                       175
+__SYSCALL(__NR_init_module, sys_init_module)
+#define __NR_delete_module                     176
+__SYSCALL(__NR_delete_module, sys_delete_module)
+#define __NR_get_kernel_syms                   177
+__SYSCALL(__NR_get_kernel_syms, sys_get_kernel_syms)
+#define __NR_query_module                      178
+__SYSCALL(__NR_query_module, sys_query_module)
+
+#define __NR_quotactl                          179
+__SYSCALL(__NR_quotactl, sys_quotactl)
+
+#define __NR_nfsservctl                        180
+__SYSCALL(__NR_nfsservctl, sys_nfsservctl)
+
+#define __NR_getpmsg                           181     /* reserved for LiS/STREAMS */
+__SYSCALL(__NR_getpmsg, sys_ni_syscall)
+#define __NR_putpmsg                           182     /* reserved for LiS/STREAMS */
+__SYSCALL(__NR_putpmsg, sys_ni_syscall)
+
+#define __NR_afs_syscall                       183     /* reserved for AFS */ 
+__SYSCALL(__NR_afs_syscall, sys_ni_syscall)
+
+#define __NR_tuxcall                   184 /* reserved for tux */
+__SYSCALL(__NR_tuxcall, sys_ni_syscall)
+
+#define __NR_security                  185 /* reserved for LSM/security */
+__SYSCALL(__NR_security, sys_ni_syscall)
+
+#define __NR_gettid            186
+__SYSCALL(__NR_gettid, sys_gettid)
+
+#define __NR_readahead         187
+__SYSCALL(__NR_readahead, sys_readahead)
+#define __NR_setxattr          188
+__SYSCALL(__NR_setxattr, sys_setxattr)
+#define __NR_lsetxattr         189
+__SYSCALL(__NR_lsetxattr, sys_lsetxattr)
+#define __NR_fsetxattr         190
+__SYSCALL(__NR_fsetxattr, sys_fsetxattr)
+#define __NR_getxattr          191
+__SYSCALL(__NR_getxattr, sys_getxattr)
+#define __NR_lgetxattr         192
+__SYSCALL(__NR_lgetxattr, sys_lgetxattr)
+#define __NR_fgetxattr         193
+__SYSCALL(__NR_fgetxattr, sys_fgetxattr) 
+#define __NR_listxattr         194
+__SYSCALL(__NR_listxattr, sys_listxattr) 
+#define __NR_llistxattr                195
+__SYSCALL(__NR_llistxattr, sys_llistxattr) 
+#define __NR_flistxattr                196
+__SYSCALL(__NR_flistxattr, sys_flistxattr) 
+#define __NR_removexattr       197
+__SYSCALL(__NR_removexattr, sys_removexattr) 
+#define __NR_lremovexattr      198
+__SYSCALL(__NR_lremovexattr, sys_lremovexattr) 
+#define __NR_fremovexattr      199
+__SYSCALL(__NR_fremovexattr, sys_fremovexattr) 
+#define __NR_tkill     200
+__SYSCALL(__NR_tkill, sys_tkill) 
+
+#define __NR_syscall_max __NR_tkill
+
+#ifndef __NO_STUBS
+
+/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
+
+#define __syscall_clobber "r11","rcx","memory" 
+
+#define __syscall_return(type, res) \
+do { \
+       if ((unsigned long)(res) >= (unsigned long)(-127)) { \
+               errno = -(res); \
+               res = -1; \
+       } \
+       return (type) (res); \
+} while (0)
+
+#ifndef __KERNEL_SYSCALLS__
+
+#define __syscall "syscall"
+
+/* XXX - _foo needs to be __foo, while __NR_bar could be _NR_bar. */
+#define _syscall0(type,name) \
+type name(void) \
+{ \
+long __res; \
+__asm__ volatile (__syscall \
+       : "=a" (__res) \
+       : "0" (__NR_##name) : __syscall_clobber ); \
+__syscall_return(type,__res); \
+}
+
+#define _syscall1(type,name,type1,arg1) \
+type name(type1 arg1) \
+{ \
+long __res; \
+__asm__ volatile (__syscall \
+       : "=a" (__res) \
+       : "0" (__NR_##name),"D" ((long)(arg1)) : __syscall_clobber ); \
+__syscall_return(type,__res); \
+}
+
+#define _syscall2(type,name,type1,arg1,type2,arg2) \
+type name(type1 arg1,type2 arg2) \
+{ \
+long __res; \
+__asm__ volatile (__syscall \
+       : "=a" (__res) \
+       : "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)) : __syscall_clobber ); \
+__syscall_return(type,__res); \
+}
+
+#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \
+type name(type1 arg1,type2 arg2,type3 arg3) \
+{ \
+long __res; \
+__asm__ volatile (__syscall \
+       : "=a" (__res) \
+       : "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)), \
+                 "d" ((long)(arg3)) : __syscall_clobber, "r9" ); \
+__syscall_return(type,__res); \
+}
+
+#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
+type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \
+{ \
+long __res; \
+__asm__ volatile ("movq %5,%%r10 ;" __syscall \
+       : "=a" (__res) \
+       : "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)), \
+         "d" ((long)(arg3)),"g" ((long)(arg4)) : __syscall_clobber,"r10" ); \
+__syscall_return(type,__res); \
+} 
+
+#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
+         type5,arg5) \
+type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \
+{ \
+long __res; \
+__asm__ volatile ("movq %5,%%r10 ; movq %6,%%r9 ; " __syscall \
+       : "=a" (__res) \
+       : "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)), \
+         "d" ((long)(arg3)),"g" ((long)(arg4)),"g" ((long)(arg5)) : \
+       __syscall_clobber,"r8","r9","r10" ); \
+__syscall_return(type,__res); \
+}
+
+#else /* __KERNEL_SYSCALLS__ */
+
+/*
+ * we need this inline - forking from kernel space will result
+ * in NO COPY ON WRITE (!!!), until an execve is executed. This
+ * is no problem, but for the stack. This is handled by not letting
+ * main() use the stack at all after fork(). Thus, no function
+ * calls - which means inline code for fork too, as otherwise we
+ * would use the stack upon exit from 'fork()'.
+ *
+ * Actually only pause and fork are needed inline, so that there
+ * won't be any messing with the stack from main(), but we define
+ * some others too.
+ */
+#define __NR__exit __NR_exit
+
+extern long sys_pause(void);
+static inline long pause(void)
+{
+       return sys_pause();
+}
+
+extern long sys_sync(void);
+static inline long sync(void)
+{
+       return sys_sync();
+}
+
+extern pid_t sys_setsid(void);
+static inline pid_t setsid(void)
+{
+       return sys_setsid();
+}
+
+extern ssize_t sys_write(unsigned int, char *, size_t);
+static inline ssize_t write(unsigned int fd, char * buf, size_t count)
+{
+       return sys_write(fd, buf, count);
+}
+
+extern ssize_t sys_read(unsigned int, char *, size_t);
+static inline ssize_t read(unsigned int fd, char * buf, size_t count)
+{
+       return sys_read(fd, buf, count);
+}
+
+extern off_t sys_lseek(unsigned int, off_t, unsigned int);
+static inline off_t lseek(unsigned int fd, off_t offset, unsigned int origin)
+{
+       return sys_lseek(fd, offset, origin);
+}
+
+extern long sys_dup(unsigned int);
+static inline long dup(unsigned int fd)
+{
+       return sys_dup(fd);
+}
+
+/* implemented in asm in arch/x86_64/kernel/entry.S */
+extern long execve(char *, char **, char **);
+
+extern long sys_open(const char *, int, int);
+static inline long open(const char * filename, int flags, int mode)
+{
+       return sys_open(filename, flags, mode);
+}
+
+extern long sys_close(unsigned int);
+static inline long close(unsigned int fd)
+{
+       return sys_close(fd);
+}
+
+extern long sys_exit(int) __attribute__((noreturn));
+extern inline long exit(int error_code)
+{
+       sys_exit(error_code);
+}
+
+extern long sys_delete_module(const char *);
+static inline long delete_module(const char *name_user)
+{
+       return sys_delete_module(name_user);
+}
+
+struct rusage; 
+asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, 
+                       int options, struct rusage * ru);
+static inline pid_t waitpid(int pid, int * wait_stat, int flags)
+{
+       return sys_wait4(pid, wait_stat, flags, NULL);
+}
+
+static inline pid_t wait(int * wait_stat)
+{
+       return waitpid(-1,wait_stat,0);
+}
+
+#endif /* __KERNEL_SYSCALLS__ */
+
+#endif /* __NO_STUBS */
+
+#endif
diff --git a/include/asm-x86_64/user.h b/include/asm-x86_64/user.h
new file mode 100644 (file)
index 0000000..77ad9e1
--- /dev/null
@@ -0,0 +1,113 @@
+#ifndef _X86_64_USER_H
+#define _X86_64_USER_H
+
+#include <asm/types.h>
+#include <asm/page.h>
+#include <linux/ptrace.h>
+/* Core file format: The core file is written in such a way that gdb
+   can understand it and provide useful information to the user (under
+   linux we use the 'trad-core' bfd).  There are quite a number of
+   obstacles to being able to view the contents of the floating point
+   registers, and until these are solved you will not be able to view the
+   contents of them.  Actually, you can read in the core file and look at
+   the contents of the user struct to find out what the floating point
+   registers contain.
+   The actual file contents are as follows:
+   UPAGE: 1 page consisting of a user struct that tells gdb what is present
+   in the file.  Directly after this is a copy of the task_struct, which
+   is currently not used by gdb, but it may come in useful at some point.
+   All of the registers are stored as part of the upage.  The upage should
+   always be only one page.
+   DATA: The data area is stored.  We use current->end_text to
+   current->brk to pick up all of the user variables, plus any memory
+   that may have been malloced.  No attempt is made to determine if a page
+   is demand-zero or if a page is totally unused, we just cover the entire
+   range.  All of the addresses are rounded in such a way that an integral
+   number of pages is written.
+   STACK: We need the stack information in order to get a meaningful
+   backtrace.  We need to write the data from (esp) to
+   current->start_stack, so we round each of these off in order to be able
+   to write an integer number of pages.
+   The minimum core file size is 3 pages, or 12288 bytes.
+*/
+
+/* This is not neccessary in first phase. It will have to be
+   synchronized with gdb later. */ 
+
+/*
+ * Pentium III FXSR, SSE support
+ *     Gareth Hughes <gareth@valinux.com>, May 2000
+ *
+ * Provide support for the GDB 5.0+ PTRACE_{GET|SET}FPXREGS requests for
+ * interacting with the FXSR-format floating point environment.  Floating
+ * point data can be accessed in the regular format in the usual manner,
+ * and both the standard and SIMD floating point data can be accessed via
+ * the new ptrace requests.  In either case, changes to the FPU environment
+ * will be reflected in the task's state as expected.
+ */
+
+struct user_i387_struct {
+       unsigned short  cwd;
+       unsigned short  swd;
+       unsigned short  twd;
+       unsigned short  fop;
+       u32     fip;
+       u32     fcs;
+       u32     foo;
+       u32     fos;
+       u32     mxcsr;
+       u32     reserved;
+       u32     st_space[32];   /* 8*16 bytes for each FP-reg = 128 bytes */
+       u32     xmm_space[32];  /* 8*16 bytes for each XMM-reg = 128 bytes */
+       u32     padding[56];
+};
+
+/*
+ * This is copy of the layout of "struct pt_regs", and
+ * is still the layout used by user mode (the new
+ * pt_regs doesn't have all registers as the kernel
+ * doesn't use the extra segment registers)
+ */
+struct user_regs_struct {
+       unsigned long r15,r14,r13,r12,rbp,rbx,r11,r10;
+       unsigned long r9,r8,rax,rcx,rdx,rsi,rdi,orig_rax;
+       unsigned long rip,cs,eflags;
+       unsigned long rsp,ss;
+       unsigned long fs_base, kernel_gs_base;
+}; 
+
+/* When the kernel dumps core, it starts by dumping the user struct -
+   this will be used by gdb to figure out where the data and stack segments
+   are within the file, and what virtual addresses to use. */
+struct user{
+/* We start with the registers, to mimic the way that "memory" is returned
+   from the ptrace(3,...) function.  */
+  struct user_regs_struct regs;                /* Where the registers are actually stored */
+/* ptrace does not yet supply these.  Someday.... */
+  int u_fpvalid;               /* True if math co-processor being used. */
+                                /* for this mess. Not yet used. */
+  struct user_i387_struct i387;        /* Math Co-processor registers. */
+/* The rest of this junk is to help gdb figure out what goes where */
+  unsigned long int u_tsize;   /* Text segment size (pages). */
+  unsigned long int u_dsize;   /* Data segment size (pages). */
+  unsigned long int u_ssize;   /* Stack segment size (pages). */
+  unsigned long start_code;     /* Starting virtual address of text. */
+  unsigned long start_stack;   /* Starting virtual address of stack area.
+                                  This is actually the bottom of the stack,
+                                  the top of the stack is always found in the
+                                  esp register.  */
+  long int signal;                     /* Signal that caused the core dump. */
+  int reserved;                        /* No longer used */
+  struct user_pt_regs * u_ar0; /* Used by gdb to help find the values for */
+                               /* the registers. */
+  struct user_i387_struct* u_fpstate;  /* Math Co-processor pointer. */
+  unsigned long magic;         /* To uniquely identify a core file */
+  char u_comm[32];             /* User command that was responsible */
+  int u_debugreg[8];
+};
+#define NBPG PAGE_SIZE
+#define UPAGES 1
+#define HOST_TEXT_START_ADDR (u.start_code)
+#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
+
+#endif /* _X86_64_USER_H */
diff --git a/include/asm-x86_64/user32.h b/include/asm-x86_64/user32.h
new file mode 100644 (file)
index 0000000..9492e1d
--- /dev/null
@@ -0,0 +1,58 @@
+#ifndef USER32_H
+#define USER32_H 1
+
+/* IA32 compatible user structures for ptrace. These should be used for 32bit coredumps too. */
+
+struct user_i387_ia32_struct {
+       u32     cwd;
+       u32     swd;
+       u32     twd;
+       u32     fip;
+       u32     fcs;
+       u32     foo;
+       u32     fos;
+       u32     st_space[20];   /* 8*10 bytes for each FP-reg = 80 bytes */
+};
+
+/*
+ * This is the old layout of "struct pt_regs", and
+ * is still the layout used by user mode (the new
+ * pt_regs doesn't have all registers as the kernel
+ * doesn't use the extra segment registers)
+ */
+struct user_regs_struct32 {
+       __u32 ebx, ecx, edx, esi, edi, ebp, eax;
+       unsigned short ds, __ds, es, __es;
+       unsigned short fs, __fs, gs, __gs;
+       __u32 orig_eax, eip;
+       unsigned short cs, __cs;
+       __u32 eflags, esp;
+       unsigned short ss, __ss;
+};
+
+struct user32 {
+  struct user_regs_struct32 regs;              /* Where the registers are actually stored */
+  int u_fpvalid;               /* True if math co-processor being used. */
+                                /* for this mess. Not yet used. */
+  struct user_i387_ia32_struct i387;   /* Math Co-processor registers. */
+/* The rest of this junk is to help gdb figure out what goes where */
+  __u32 u_tsize;       /* Text segment size (pages). */
+  __u32 u_dsize;       /* Data segment size (pages). */
+  __u32 u_ssize;       /* Stack segment size (pages). */
+  __u32 start_code;     /* Starting virtual address of text. */
+  __u32 start_stack;   /* Starting virtual address of stack area.
+                                  This is actually the bottom of the stack,
+                                  the top of the stack is always found in the
+                                  esp register.  */
+  __u32 signal;                /* Signal that caused the core dump. */
+  int reserved;                        /* No __u32er used */
+  __u32 u_ar0; /* Used by gdb to help find the values for */
+                               /* the registers. */
+  __u32 u_fpstate;     /* Math Co-processor pointer. */
+  __u32 magic;         /* To uniquely identify a core file */
+  char u_comm[32];             /* User command that was responsible */
+  int u_debugreg[8];
+};
+
+
+#endif
diff --git a/include/asm-x86_64/vga.h b/include/asm-x86_64/vga.h
new file mode 100644 (file)
index 0000000..ef0c0e5
--- /dev/null
@@ -0,0 +1,20 @@
+/*
+ *     Access to VGA videoram
+ *
+ *     (c) 1998 Martin Mares <mj@ucw.cz>
+ */
+
+#ifndef _LINUX_ASM_VGA_H_
+#define _LINUX_ASM_VGA_H_
+
+/*
+ *     On the PC, we can just recalculate addresses and then
+ *     access the videoram directly without any black magic.
+ */
+
+#define VGA_MAP_MEM(x) (unsigned long)phys_to_virt(x)
+
+#define vga_readb(x) (*(x))
+#define vga_writeb(x,y) (*(y) = (x))
+
+#endif
diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h
new file mode 100644 (file)
index 0000000..5e02736
--- /dev/null
@@ -0,0 +1,48 @@
+#ifndef _ASM_X86_64_VSYSCALL_H_
+#define _ASM_X86_64_VSYSCALL_H_
+
+enum vsyscall_num {
+       __NR_vgettimeofday,
+       __NR_vtime,
+};
+
+#define VSYSCALL_START (-10UL << 20)
+#define VSYSCALL_SIZE 1024
+#define VSYSCALL_END (-2UL << 20)
+#define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr))
+
+#ifdef __KERNEL__
+
+#define __section_last_tsc_low __attribute__ ((unused, __section__ (".last_tsc_low")))
+#define __section_delay_at_last_interrupt      __attribute__ ((unused, __section__ (".delay_at_last_interrupt")))
+#define __section_fast_gettimeoffset_quotient  __attribute__ ((unused, __section__ (".fast_gettimeoffset_quotient")))
+#define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies")))
+#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies")))
+#define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz")))
+#define __section_xtime __attribute__ ((unused, __section__ (".xtime")))
+#define __section_vxtime_sequence __attribute__ ((unused, __section__ (".vxtime_sequence")))
+
+/* vsyscall space (readonly) */
+extern long __vxtime_sequence[2];
+extern int __delay_at_last_interrupt;
+extern unsigned long __last_tsc_low;
+extern unsigned long __fast_gettimeoffset_quotient;
+extern struct timeval __xtime;
+extern volatile unsigned long __jiffies;
+extern unsigned long __wall_jiffies;
+extern struct timezone __sys_tz;
+
+/* kernel space (writeable) */
+extern unsigned long last_tsc_low;
+extern int delay_at_last_interrupt;
+extern unsigned long fast_gettimeoffset_quotient;
+extern unsigned long wall_jiffies;
+extern struct timezone sys_tz;
+extern long vxtime_sequence[2];
+
+#define vxtime_lock() do { vxtime_sequence[0]++; wmb(); } while(0)
+#define vxtime_unlock() do { wmb(); vxtime_sequence[1]++; } while (0)
+
+#endif /* __KERNEL__ */
+
+#endif /* _ASM_X86_64_VSYSCALL_H_ */
diff --git a/include/asm-x86_64/xor.h b/include/asm-x86_64/xor.h
new file mode 100644 (file)
index 0000000..01e1d1e
--- /dev/null
@@ -0,0 +1,859 @@
+/*
+ * include/asm-i386/xor.h
+ *
+ * Optimized RAID-5 checksumming functions for MMX and SSE.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * High-speed RAID5 checksumming functions utilizing MMX instructions.
+ * Copyright (C) 1998 Ingo Molnar.
+ */
+
+#define FPU_SAVE                                                       \
+  do {                                                                 \
+       if (!test_thread_flag(TIF_USEDFPU))                             \
+               __asm__ __volatile__ (" clts;\n");                      \
+       __asm__ __volatile__ ("fsave %0; fwait": "=m"(fpu_save[0]));    \
+  } while (0)
+
+#define FPU_RESTORE                                                    \
+  do {                                                                 \
+       __asm__ __volatile__ ("frstor %0": : "m"(fpu_save[0]));         \
+       if (!test_thread_flag(TIF_USEDFPU))                             \
+               stts();                                                 \
+  } while (0)
+
+#define LD(x,y)                "       movq   8*("#x")(%1), %%mm"#y"   ;\n"
+#define ST(x,y)                "       movq %%mm"#y",   8*("#x")(%1)   ;\n"
+#define XO1(x,y)       "       pxor   8*("#x")(%2), %%mm"#y"   ;\n"
+#define XO2(x,y)       "       pxor   8*("#x")(%3), %%mm"#y"   ;\n"
+#define XO3(x,y)       "       pxor   8*("#x")(%4), %%mm"#y"   ;\n"
+#define XO4(x,y)       "       pxor   8*("#x")(%5), %%mm"#y"   ;\n"
+
+
+static void
+xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+       unsigned long lines = bytes >> 7;
+       char fpu_save[108];
+
+       FPU_SAVE;
+
+       __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+       LD(i,0)                                 \
+               LD(i+1,1)                       \
+                       LD(i+2,2)               \
+                               LD(i+3,3)       \
+       XO1(i,0)                                \
+       ST(i,0)                                 \
+               XO1(i+1,1)                      \
+               ST(i+1,1)                       \
+                       XO1(i+2,2)              \
+                       ST(i+2,2)               \
+                               XO1(i+3,3)      \
+                               ST(i+3,3)
+
+       " .align 32                     ;\n"
+       " 1:                            ;\n"
+
+       BLOCK(0)
+       BLOCK(4)
+       BLOCK(8)
+       BLOCK(12)
+
+       "       addq $128, %1         ;\n"
+       "       addq $128, %2         ;\n"
+       "       decq %0               ;\n"
+       "       jnz 1b                ;\n"
+               :
+       : "r" (lines),
+         "r" (p1), "r" (p2)
+       : "memory");
+
+       FPU_RESTORE;
+}
+
+static void
+xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+             unsigned long *p3)
+{
+       unsigned long lines = bytes >> 7;
+       char fpu_save[108];
+
+       FPU_SAVE;
+
+       __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+       LD(i,0)                                 \
+               LD(i+1,1)                       \
+                       LD(i+2,2)               \
+                               LD(i+3,3)       \
+       XO1(i,0)                                \
+               XO1(i+1,1)                      \
+                       XO1(i+2,2)              \
+                               XO1(i+3,3)      \
+       XO2(i,0)                                \
+       ST(i,0)                                 \
+               XO2(i+1,1)                      \
+               ST(i+1,1)                       \
+                       XO2(i+2,2)              \
+                       ST(i+2,2)               \
+                               XO2(i+3,3)      \
+                               ST(i+3,3)
+
+       " .align 32                     ;\n"
+       " 1:                            ;\n"
+
+       BLOCK(0)
+       BLOCK(4)
+       BLOCK(8)
+       BLOCK(12)
+
+       "       addq $128, %1         ;\n"
+       "       addq $128, %2         ;\n"
+       "       addq $128, %3         ;\n"
+       "       decq %0               ;\n"
+       "       jnz 1b                ;\n"
+               :
+       : "r" (lines),
+         "r" (p1), "r" (p2), "r" (p3)
+       : "memory");
+
+       FPU_RESTORE;
+}
+
+static void
+xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+             unsigned long *p3, unsigned long *p4)
+{
+       unsigned long lines = bytes >> 7;
+       char fpu_save[108];
+
+       FPU_SAVE;
+
+       __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+       LD(i,0)                                 \
+               LD(i+1,1)                       \
+                       LD(i+2,2)               \
+                               LD(i+3,3)       \
+       XO1(i,0)                                \
+               XO1(i+1,1)                      \
+                       XO1(i+2,2)              \
+                               XO1(i+3,3)      \
+       XO2(i,0)                                \
+               XO2(i+1,1)                      \
+                       XO2(i+2,2)              \
+                               XO2(i+3,3)      \
+       XO3(i,0)                                \
+       ST(i,0)                                 \
+               XO3(i+1,1)                      \
+               ST(i+1,1)                       \
+                       XO3(i+2,2)              \
+                       ST(i+2,2)               \
+                               XO3(i+3,3)      \
+                               ST(i+3,3)
+
+       " .align 32                     ;\n"
+       " 1:                            ;\n"
+
+       BLOCK(0)
+       BLOCK(4)
+       BLOCK(8)
+       BLOCK(12)
+
+       "       addq $128, %1         ;\n"
+       "       addq $128, %2         ;\n"
+       "       addq $128, %3         ;\n"
+       "       addq $128, %4         ;\n"
+       "       decq %0               ;\n"
+       "       jnz 1b                ;\n"
+               :
+       : "r" (lines),
+         "r" (p1), "r" (p2), "r" (p3), "r" (p4)
+       : "memory");
+
+       FPU_RESTORE;
+}
+
+static void
+xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+             unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+       unsigned long lines = bytes >> 7;
+       char fpu_save[108];
+
+       FPU_SAVE;
+
+       __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+       LD(i,0)                                 \
+               LD(i+1,1)                       \
+                       LD(i+2,2)               \
+                               LD(i+3,3)       \
+       XO1(i,0)                                \
+               XO1(i+1,1)                      \
+                       XO1(i+2,2)              \
+                               XO1(i+3,3)      \
+       XO2(i,0)                                \
+               XO2(i+1,1)                      \
+                       XO2(i+2,2)              \
+                               XO2(i+3,3)      \
+       XO3(i,0)                                \
+               XO3(i+1,1)                      \
+                       XO3(i+2,2)              \
+                               XO3(i+3,3)      \
+       XO4(i,0)                                \
+       ST(i,0)                                 \
+               XO4(i+1,1)                      \
+               ST(i+1,1)                       \
+                       XO4(i+2,2)              \
+                       ST(i+2,2)               \
+                               XO4(i+3,3)      \
+                               ST(i+3,3)
+
+       " .align 32                     ;\n"
+       " 1:                            ;\n"
+
+       BLOCK(0)
+       BLOCK(4)
+       BLOCK(8)
+       BLOCK(12)
+
+       "       addq $128, %1         ;\n"
+       "       addq $128, %2         ;\n"
+       "       addq $128, %3         ;\n"
+       "       addq $128, %4         ;\n"
+       "       addq $128, %5         ;\n"
+       "       decq %0               ;\n"
+       "       jnz 1b                ;\n"
+               :
+       : "g" (lines),
+         "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
+       : "memory");
+
+       FPU_RESTORE;
+}
+
+#undef LD
+#undef XO1
+#undef XO2
+#undef XO3
+#undef XO4
+#undef ST
+#undef BLOCK
+
+static void
+xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+       unsigned long lines = bytes >> 6;
+       char fpu_save[108];
+
+       FPU_SAVE;
+
+       __asm__ __volatile__ (
+       " .align 32                  ;\n"
+       " 1:                         ;\n"
+       "       movq   (%1), %%mm0   ;\n"
+       "       movq  8(%1), %%mm1   ;\n"
+       "       pxor   (%2), %%mm0   ;\n"
+       "       movq 16(%1), %%mm2   ;\n"
+       "       movq %%mm0,   (%1)   ;\n"
+       "       pxor  8(%2), %%mm1   ;\n"
+       "       movq 24(%1), %%mm3   ;\n"
+       "       movq %%mm1,  8(%1)   ;\n"
+       "       pxor 16(%2), %%mm2   ;\n"
+       "       movq 32(%1), %%mm4   ;\n"
+       "       movq %%mm2, 16(%1)   ;\n"
+       "       pxor 24(%2), %%mm3   ;\n"
+       "       movq 40(%1), %%mm5   ;\n"
+       "       movq %%mm3, 24(%1)   ;\n"
+       "       pxor 32(%2), %%mm4   ;\n"
+       "       movq 48(%1), %%mm6   ;\n"
+       "       movq %%mm4, 32(%1)   ;\n"
+       "       pxor 40(%2), %%mm5   ;\n"
+       "       movq 56(%1), %%mm7   ;\n"
+       "       movq %%mm5, 40(%1)   ;\n"
+       "       pxor 48(%2), %%mm6   ;\n"
+       "       pxor 56(%2), %%mm7   ;\n"
+       "       movq %%mm6, 48(%1)   ;\n"
+       "       movq %%mm7, 56(%1)   ;\n"
+       
+       "       addq $64, %1         ;\n"
+       "       addq $64, %2         ;\n"
+       "       decq %0              ;\n"
+       "       jnz 1b               ;\n"
+       : 
+       : "r" (lines),
+         "r" (p1), "r" (p2)
+       : "memory");
+
+       FPU_RESTORE;
+}
+
+static void
+xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+            unsigned long *p3)
+{
+       unsigned long lines = bytes >> 6;
+       char fpu_save[108];
+
+       FPU_SAVE;
+
+       __asm__ __volatile__ (
+       " .align 32,0x90             ;\n"
+       " 1:                         ;\n"
+       "       movq   (%1), %%mm0   ;\n"
+       "       movq  8(%1), %%mm1   ;\n"
+       "       pxor   (%2), %%mm0   ;\n"
+       "       movq 16(%1), %%mm2   ;\n"
+       "       pxor  8(%2), %%mm1   ;\n"
+       "       pxor   (%3), %%mm0   ;\n"
+       "       pxor 16(%2), %%mm2   ;\n"
+       "       movq %%mm0,   (%1)   ;\n"
+       "       pxor  8(%3), %%mm1   ;\n"
+       "       pxor 16(%3), %%mm2   ;\n"
+       "       movq 24(%1), %%mm3   ;\n"
+       "       movq %%mm1,  8(%1)   ;\n"
+       "       movq 32(%1), %%mm4   ;\n"
+       "       movq 40(%1), %%mm5   ;\n"
+       "       pxor 24(%2), %%mm3   ;\n"
+       "       movq %%mm2, 16(%1)   ;\n"
+       "       pxor 32(%2), %%mm4   ;\n"
+       "       pxor 24(%3), %%mm3   ;\n"
+       "       pxor 40(%2), %%mm5   ;\n"
+       "       movq %%mm3, 24(%1)   ;\n"
+       "       pxor 32(%3), %%mm4   ;\n"
+       "       pxor 40(%3), %%mm5   ;\n"
+       "       movq 48(%1), %%mm6   ;\n"
+       "       movq %%mm4, 32(%1)   ;\n"
+       "       movq 56(%1), %%mm7   ;\n"
+       "       pxor 48(%2), %%mm6   ;\n"
+       "       movq %%mm5, 40(%1)   ;\n"
+       "       pxor 56(%2), %%mm7   ;\n"
+       "       pxor 48(%3), %%mm6   ;\n"
+       "       pxor 56(%3), %%mm7   ;\n"
+       "       movq %%mm6, 48(%1)   ;\n"
+       "       movq %%mm7, 56(%1)   ;\n"
+      
+       "       addq $64, %1         ;\n"
+       "       addq $64, %2         ;\n"
+       "       addq $64, %3         ;\n"
+       "       decq %0              ;\n"
+       "       jnz 1b               ;\n"
+       : 
+       : "r" (lines),
+         "r" (p1), "r" (p2), "r" (p3)
+       : "memory" );
+
+       FPU_RESTORE;
+}
+
+static void
+xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+            unsigned long *p3, unsigned long *p4)
+{
+       unsigned long lines = bytes >> 6;
+       char fpu_save[108];
+
+       FPU_SAVE;
+
+       __asm__ __volatile__ (
+       " .align 32,0x90             ;\n"
+       " 1:                         ;\n"
+       "       movq   (%1), %%mm0   ;\n"
+       "       movq  8(%1), %%mm1   ;\n"
+       "       pxor   (%2), %%mm0   ;\n"
+       "       movq 16(%1), %%mm2   ;\n"
+       "       pxor  8(%2), %%mm1   ;\n"
+       "       pxor   (%3), %%mm0   ;\n"
+       "       pxor 16(%2), %%mm2   ;\n"
+       "       pxor  8(%3), %%mm1   ;\n"
+       "       pxor   (%4), %%mm0   ;\n"
+       "       movq 24(%1), %%mm3   ;\n"
+       "       pxor 16(%3), %%mm2   ;\n"
+       "       pxor  8(%4), %%mm1   ;\n"
+       "       movq %%mm0,   (%1)   ;\n"
+       "       movq 32(%1), %%mm4   ;\n"
+       "       pxor 24(%2), %%mm3   ;\n"
+       "       pxor 16(%4), %%mm2   ;\n"
+       "       movq %%mm1,  8(%1)   ;\n"
+       "       movq 40(%1), %%mm5   ;\n"
+       "       pxor 32(%2), %%mm4   ;\n"
+       "       pxor 24(%3), %%mm3   ;\n"
+       "       movq %%mm2, 16(%1)   ;\n"
+       "       pxor 40(%2), %%mm5   ;\n"
+       "       pxor 32(%3), %%mm4   ;\n"
+       "       pxor 24(%4), %%mm3   ;\n"
+       "       movq %%mm3, 24(%1)   ;\n"
+       "       movq 56(%1), %%mm7   ;\n"
+       "       movq 48(%1), %%mm6   ;\n"
+       "       pxor 40(%3), %%mm5   ;\n"
+       "       pxor 32(%4), %%mm4   ;\n"
+       "       pxor 48(%2), %%mm6   ;\n"
+       "       movq %%mm4, 32(%1)   ;\n"
+       "       pxor 56(%2), %%mm7   ;\n"
+       "       pxor 40(%4), %%mm5   ;\n"
+       "       pxor 48(%3), %%mm6   ;\n"
+       "       pxor 56(%3), %%mm7   ;\n"
+       "       movq %%mm5, 40(%1)   ;\n"
+       "       pxor 48(%4), %%mm6   ;\n"
+       "       pxor 56(%4), %%mm7   ;\n"
+       "       movq %%mm6, 48(%1)   ;\n"
+       "       movq %%mm7, 56(%1)   ;\n"
+      
+       "       addq $64, %1         ;\n"
+       "       addq $64, %2         ;\n"
+       "       addq $64, %3         ;\n"
+       "       addq $64, %4         ;\n"
+       "       decq %0              ;\n"
+       "       jnz 1b               ;\n"
+       : 
+       : "r" (lines),
+         "r" (p1), "r" (p2), "r" (p3), "r" (p4)
+       : "memory");
+
+       FPU_RESTORE;
+}
+
+static void
+xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+            unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+       unsigned long lines = bytes >> 6;
+       char fpu_save[108];
+
+       FPU_SAVE;
+
+       __asm__ __volatile__ (
+       " .align 32,0x90             ;\n"
+       " 1:                         ;\n"
+       "       movq   (%1), %%mm0   ;\n"
+       "       movq  8(%1), %%mm1   ;\n"
+       "       pxor   (%2), %%mm0   ;\n"
+       "       pxor  8(%2), %%mm1   ;\n"
+       "       movq 16(%1), %%mm2   ;\n"
+       "       pxor   (%3), %%mm0   ;\n"
+       "       pxor  8(%3), %%mm1   ;\n"
+       "       pxor 16(%2), %%mm2   ;\n"
+       "       pxor   (%4), %%mm0   ;\n"
+       "       pxor  8(%4), %%mm1   ;\n"
+       "       pxor 16(%3), %%mm2   ;\n"
+       "       movq 24(%1), %%mm3   ;\n"
+       "       pxor   (%5), %%mm0   ;\n"
+       "       pxor  8(%5), %%mm1   ;\n"
+       "       movq %%mm0,   (%1)   ;\n"
+       "       pxor 16(%4), %%mm2   ;\n"
+       "       pxor 24(%2), %%mm3   ;\n"
+       "       movq %%mm1,  8(%1)   ;\n"
+       "       pxor 16(%5), %%mm2   ;\n"
+       "       pxor 24(%3), %%mm3   ;\n"
+       "       movq 32(%1), %%mm4   ;\n"
+       "       movq %%mm2, 16(%1)   ;\n"
+       "       pxor 24(%4), %%mm3   ;\n"
+       "       pxor 32(%2), %%mm4   ;\n"
+       "       movq 40(%1), %%mm5   ;\n"
+       "       pxor 24(%5), %%mm3   ;\n"
+       "       pxor 32(%3), %%mm4   ;\n"
+       "       pxor 40(%2), %%mm5   ;\n"
+       "       movq %%mm3, 24(%1)   ;\n"
+       "       pxor 32(%4), %%mm4   ;\n"
+       "       pxor 40(%3), %%mm5   ;\n"
+       "       movq 48(%1), %%mm6   ;\n"
+       "       movq 56(%1), %%mm7   ;\n"
+       "       pxor 32(%5), %%mm4   ;\n"
+       "       pxor 40(%4), %%mm5   ;\n"
+       "       pxor 48(%2), %%mm6   ;\n"
+       "       pxor 56(%2), %%mm7   ;\n"
+       "       movq %%mm4, 32(%1)   ;\n"
+       "       pxor 48(%3), %%mm6   ;\n"
+       "       pxor 56(%3), %%mm7   ;\n"
+       "       pxor 40(%5), %%mm5   ;\n"
+       "       pxor 48(%4), %%mm6   ;\n"
+       "       pxor 56(%4), %%mm7   ;\n"
+       "       movq %%mm5, 40(%1)   ;\n"
+       "       pxor 48(%5), %%mm6   ;\n"
+       "       pxor 56(%5), %%mm7   ;\n"
+       "       movq %%mm6, 48(%1)   ;\n"
+       "       movq %%mm7, 56(%1)   ;\n"
+      
+       "       addq $64, %1         ;\n"
+       "       addq $64, %2         ;\n"
+       "       addq $64, %3         ;\n"
+       "       addq $64, %4         ;\n"
+       "       addq $64, %5         ;\n"
+       "       decq %0              ;\n"
+       "       jnz 1b               ;\n"
+       : 
+       : "g" (lines),
+         "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
+       : "memory");
+
+       FPU_RESTORE;
+}
+
+static struct xor_block_template xor_block_pII_mmx = {
+       name: "pII_mmx",
+       do_2: xor_pII_mmx_2,
+       do_3: xor_pII_mmx_3,
+       do_4: xor_pII_mmx_4,
+       do_5: xor_pII_mmx_5,
+};
+
+static struct xor_block_template xor_block_p5_mmx = {
+       name: "p5_mmx",
+       do_2: xor_p5_mmx_2,
+       do_3: xor_p5_mmx_3,
+       do_4: xor_p5_mmx_4,
+       do_5: xor_p5_mmx_5,
+};
+
+#undef FPU_SAVE
+#undef FPU_RESTORE
+
+/*
+ * Cache avoiding checksumming functions utilizing KNI instructions
+ * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
+ */
+
+#define XMMS_SAVE                              \
+       __asm__ __volatile__ (                  \
+               "movq %%cr0,%0          ;\n\t"  \
+               "clts                   ;\n\t"  \
+               "movups %%xmm0,(%1)     ;\n\t"  \
+               "movups %%xmm1,0x10(%1) ;\n\t"  \
+               "movups %%xmm2,0x20(%1) ;\n\t"  \
+               "movups %%xmm3,0x30(%1) ;\n\t"  \
+               : "=r" (cr0)                    \
+               : "r" (xmm_save)                \
+               : "memory")
+
+#define XMMS_RESTORE                           \
+       __asm__ __volatile__ (                  \
+               "sfence                 ;\n\t"  \
+               "movups (%1),%%xmm0     ;\n\t"  \
+               "movups 0x10(%1),%%xmm1 ;\n\t"  \
+               "movups 0x20(%1),%%xmm2 ;\n\t"  \
+               "movups 0x30(%1),%%xmm3 ;\n\t"  \
+               "movq   %0,%%cr0        ;\n\t"  \
+               :                               \
+               : "r" (cr0), "r" (xmm_save)     \
+               : "memory")
+
+#define OFFS(x)                "16*("#x")"
+#define PF_OFFS(x)     "256+16*("#x")"
+#define        PF0(x)          "       prefetchnta "PF_OFFS(x)"(%1)            ;\n"
+#define LD(x,y)                "       movaps   "OFFS(x)"(%1), %%xmm"#y"       ;\n"
+#define ST(x,y)                "       movaps %%xmm"#y",   "OFFS(x)"(%1)       ;\n"
+#define PF1(x)         "       prefetchnta "PF_OFFS(x)"(%2)            ;\n"
+#define PF2(x)         "       prefetchnta "PF_OFFS(x)"(%3)            ;\n"
+#define PF3(x)         "       prefetchnta "PF_OFFS(x)"(%4)            ;\n"
+#define PF4(x)         "       prefetchnta "PF_OFFS(x)"(%5)            ;\n"
+#define PF5(x)         "       prefetchnta "PF_OFFS(x)"(%6)            ;\n"
+#define XO1(x,y)       "       xorps   "OFFS(x)"(%2), %%xmm"#y"        ;\n"
+#define XO2(x,y)       "       xorps   "OFFS(x)"(%3), %%xmm"#y"        ;\n"
+#define XO3(x,y)       "       xorps   "OFFS(x)"(%4), %%xmm"#y"        ;\n"
+#define XO4(x,y)       "       xorps   "OFFS(x)"(%5), %%xmm"#y"        ;\n"
+#define XO5(x,y)       "       xorps   "OFFS(x)"(%6), %%xmm"#y"        ;\n"
+
+
+static void
+xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+        unsigned long lines = bytes >> 8;
+       char xmm_save[16*4];
+       unsigned long cr0;
+
+       XMMS_SAVE;
+
+        __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+               LD(i,0)                                 \
+                       LD(i+1,1)                       \
+               PF1(i)                                  \
+                               PF1(i+2)                \
+                               LD(i+2,2)               \
+                                       LD(i+3,3)       \
+               PF0(i+4)                                \
+                               PF0(i+6)                \
+               XO1(i,0)                                \
+                       XO1(i+1,1)                      \
+                               XO1(i+2,2)              \
+                                       XO1(i+3,3)      \
+               ST(i,0)                                 \
+                       ST(i+1,1)                       \
+                               ST(i+2,2)               \
+                                       ST(i+3,3)       \
+
+
+               PF0(0)
+                               PF0(2)
+
+       " .align 32                     ;\n"
+        " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+        "       addq $256, %1           ;\n"
+        "       addq $256, %2           ;\n"
+        "       decq %0                 ;\n"
+        "       jnz 1b                  ;\n"
+       :
+       : "r" (lines),
+         "r" (p1), "r" (p2)
+        : "memory");
+
+       XMMS_RESTORE;
+}
+
+static void
+xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+         unsigned long *p3)
+{
+        unsigned long lines = bytes >> 8;
+       char xmm_save[16*4];
+       unsigned long cr0;
+
+       XMMS_SAVE;
+
+        __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+               PF1(i)                                  \
+                               PF1(i+2)                \
+               LD(i,0)                                 \
+                       LD(i+1,1)                       \
+                               LD(i+2,2)               \
+                                       LD(i+3,3)       \
+               PF2(i)                                  \
+                               PF2(i+2)                \
+               PF0(i+4)                                \
+                               PF0(i+6)                \
+               XO1(i,0)                                \
+                       XO1(i+1,1)                      \
+                               XO1(i+2,2)              \
+                                       XO1(i+3,3)      \
+               XO2(i,0)                                \
+                       XO2(i+1,1)                      \
+                               XO2(i+2,2)              \
+                                       XO2(i+3,3)      \
+               ST(i,0)                                 \
+                       ST(i+1,1)                       \
+                               ST(i+2,2)               \
+                                       ST(i+3,3)       \
+
+
+               PF0(0)
+                               PF0(2)
+
+       " .align 32                     ;\n"
+        " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+        "       addq $256, %1           ;\n"
+        "       addq $256, %2           ;\n"
+        "       addq $256, %3           ;\n"
+        "       decq %0                 ;\n"
+        "       jnz 1b                  ;\n"
+       :
+       : "r" (lines),
+         "r" (p1), "r"(p2), "r"(p3)
+        : "memory" );
+
+       XMMS_RESTORE;
+}
+
+static void
+xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+         unsigned long *p3, unsigned long *p4)
+{
+        unsigned long lines = bytes >> 8;
+       char xmm_save[16*4];
+       unsigned long cr0;
+
+       XMMS_SAVE;
+
+        __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+               PF1(i)                                  \
+                               PF1(i+2)                \
+               LD(i,0)                                 \
+                       LD(i+1,1)                       \
+                               LD(i+2,2)               \
+                                       LD(i+3,3)       \
+               PF2(i)                                  \
+                               PF2(i+2)                \
+               XO1(i,0)                                \
+                       XO1(i+1,1)                      \
+                               XO1(i+2,2)              \
+                                       XO1(i+3,3)      \
+               PF3(i)                                  \
+                               PF3(i+2)                \
+               PF0(i+4)                                \
+                               PF0(i+6)                \
+               XO2(i,0)                                \
+                       XO2(i+1,1)                      \
+                               XO2(i+2,2)              \
+                                       XO2(i+3,3)      \
+               XO3(i,0)                                \
+                       XO3(i+1,1)                      \
+                               XO3(i+2,2)              \
+                                       XO3(i+3,3)      \
+               ST(i,0)                                 \
+                       ST(i+1,1)                       \
+                               ST(i+2,2)               \
+                                       ST(i+3,3)       \
+
+
+               PF0(0)
+                               PF0(2)
+
+       " .align 32                     ;\n"
+        " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+        "       addq $256, %1           ;\n"
+        "       addq $256, %2           ;\n"
+        "       addq $256, %3           ;\n"
+        "       addq $256, %4           ;\n"
+        "       decq %0                 ;\n"
+        "       jnz 1b                  ;\n"
+       :
+       : "r" (lines),
+         "r" (p1), "r" (p2), "r" (p3), "r" (p4)
+        : "memory" );
+
+       XMMS_RESTORE;
+}
+
+static void
+xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+         unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+        unsigned long lines = bytes >> 8;
+       char xmm_save[16*4];
+       unsigned long cr0;
+
+       XMMS_SAVE;
+
+        __asm__ __volatile__ (
+#undef BLOCK
+#define BLOCK(i) \
+               PF1(i)                                  \
+                               PF1(i+2)                \
+               LD(i,0)                                 \
+                       LD(i+1,1)                       \
+                               LD(i+2,2)               \
+                                       LD(i+3,3)       \
+               PF2(i)                                  \
+                               PF2(i+2)                \
+               XO1(i,0)                                \
+                       XO1(i+1,1)                      \
+                               XO1(i+2,2)              \
+                                       XO1(i+3,3)      \
+               PF3(i)                                  \
+                               PF3(i+2)                \
+               XO2(i,0)                                \
+                       XO2(i+1,1)                      \
+                               XO2(i+2,2)              \
+                                       XO2(i+3,3)      \
+               PF4(i)                                  \
+                               PF4(i+2)                \
+               PF0(i+4)                                \
+                               PF0(i+6)                \
+               XO3(i,0)                                \
+                       XO3(i+1,1)                      \
+                               XO3(i+2,2)              \
+                                       XO3(i+3,3)      \
+               XO4(i,0)                                \
+                       XO4(i+1,1)                      \
+                               XO4(i+2,2)              \
+                                       XO4(i+3,3)      \
+               ST(i,0)                                 \
+                       ST(i+1,1)                       \
+                               ST(i+2,2)               \
+                                       ST(i+3,3)       \
+
+
+               PF0(0)
+                               PF0(2)
+
+       " .align 32                     ;\n"
+        " 1:                            ;\n"
+
+               BLOCK(0)
+               BLOCK(4)
+               BLOCK(8)
+               BLOCK(12)
+
+        "       addq $256, %1           ;\n"
+        "       addq $256, %2           ;\n"
+        "       addq $256, %3           ;\n"
+        "       addq $256, %4           ;\n"
+        "       addq $256, %5           ;\n"
+        "       decq %0                 ;\n"
+        "       jnz 1b                  ;\n"
+       :
+       : "r" (lines),
+         "r" (p1), "r" (p2), "r" (p3), "r" (p4), "r" (p5)
+       : "memory");
+
+       XMMS_RESTORE;
+}
+
+static struct xor_block_template xor_block_pIII_sse = {
+        name: "pIII_sse",
+        do_2: xor_sse_2,
+        do_3: xor_sse_3,
+        do_4: xor_sse_4,
+        do_5: xor_sse_5,
+};
+
+/* Also try the generic routines.  */
+#include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES                              \
+       do {                                            \
+               xor_speed(&xor_block_8regs);            \
+               xor_speed(&xor_block_32regs);           \
+               if (cpu_has_xmm)                        \
+                       xor_speed(&xor_block_pIII_sse); \
+               if (md_cpu_has_mmx()) {                 \
+                       xor_speed(&xor_block_pII_mmx);  \
+                       xor_speed(&xor_block_p5_mmx);   \
+               }                                       \
+       } while (0)
+
+/* We force the use of the SSE xor block because it can write around L2.
+   We may also be able to load into the L1 only depending on how the cpu
+   deals with a load to a line that is being prefetched.  */
+#define XOR_SELECT_TEMPLATE(FASTEST) \
+       (cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)