]> git.neil.brown.name Git - history.git/commitdiff
Import 2.1.8 2.1.8
authorLinus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:12:38 +0000 (15:12 -0500)
committerLinus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:12:38 +0000 (15:12 -0500)
159 files changed:
CREDITS
Documentation/Configure.help
Documentation/ide.txt
Makefile
arch/alpha/lib/Makefile
arch/alpha/lib/strlen_user.S [new file with mode: 0644]
arch/i386/defconfig
arch/i386/kernel/entry.S
arch/i386/kernel/irq.c
arch/i386/kernel/setup.c
arch/i386/kernel/time.c
arch/i386/kernel/traps.c
arch/i386/mm/extable.c
arch/m68k/config.in
drivers/block/Config.in
drivers/block/Makefile
drivers/block/cmd640.c
drivers/block/floppy.c
drivers/block/genhd.c
drivers/block/ide-cd.c
drivers/block/ide-disk.c [new file with mode: 0644]
drivers/block/ide-floppy.c [new file with mode: 0644]
drivers/block/ide-probe.c [new file with mode: 0644]
drivers/block/ide-tape.c
drivers/block/ide-tape.h [deleted file]
drivers/block/ide.c
drivers/block/ide.h
drivers/block/opti621.c [new file with mode: 0644]
drivers/block/promise.c
drivers/block/triton.c
drivers/cdrom/mcdx.c
drivers/char/ChangeLog
drivers/char/console.c
drivers/char/keyb_m68k.c
drivers/char/keyboard.c
drivers/char/misc.c
drivers/char/n_tty.c
drivers/char/serial.c
drivers/char/tty_ioctl.c
drivers/net/3c501.c
drivers/net/3c59x.c
drivers/net/ppp.c
drivers/pci/pci.c
drivers/sbus/char/sunkbd.c
drivers/scsi/ChangeLog.ncr53c8xx
drivers/scsi/Config.in
drivers/scsi/README.ncr53c8xx
drivers/scsi/aha1542.c
drivers/scsi/dtc.c
drivers/scsi/dtc.h
drivers/scsi/fdomain.c
drivers/scsi/ncr53c8xx.c
drivers/scsi/ncr53c8xx.h
drivers/scsi/scsicam.c
fs/smbfs/inode.c
include/asm-alpha/ide.h [new file with mode: 0644]
include/asm-i386/checksum.h
include/asm-i386/ide.h [new file with mode: 0644]
include/asm-m68k/ide.h
include/asm-mips/checksum.h
include/asm-sparc/checksum.h
include/linux/hdreg.h
include/linux/icmpv6.h [new file with mode: 0644]
include/linux/if_arp.h
include/linux/in.h
include/linux/in6.h [new file with mode: 0644]
include/linux/ipv6.h [new file with mode: 0644]
include/linux/ipv6_route.h [new file with mode: 0644]
include/linux/keyboard.h
include/linux/limits.h
include/linux/locks.h
include/linux/module.h
include/linux/netdevice.h
include/linux/pci.h
include/linux/ppp_defs.h
include/linux/proc_fs.h
include/linux/route.h
include/linux/serial.h
include/linux/serial_reg.h
include/linux/skbuff.h
include/linux/socket.h
include/linux/sysctl.h
include/linux/tty.h
include/linux/tty_driver.h
include/linux/tty_ldisc.h
include/linux/un.h
include/net/addrconf.h [new file with mode: 0644]
include/net/checksum.h
include/net/if_inet6.h [new file with mode: 0644]
include/net/inet_common.h [new file with mode: 0644]
include/net/ipv6.h [new file with mode: 0644]
include/net/ipv6_route.h [new file with mode: 0644]
include/net/ndisc.h [new file with mode: 0644]
include/net/netlink.h
include/net/protocol.h
include/net/rawv6.h [new file with mode: 0644]
include/net/sit.h [new file with mode: 0644]
include/net/snmp.h
include/net/sock.h
include/net/tcp.h
include/net/transp_v6.h [new file with mode: 0644]
include/net/udp.h
init/main.c
kernel/fork.c
kernel/ksyms.c
kernel/module.c
kernel/sched.c
kernel/sysctl.c
net/Config.in
net/Makefile
net/bridge/br.c
net/core/dev.c
net/core/iovec.c
net/core/skbuff.c
net/core/sock.c
net/ethernet/eth.c
net/ipv4/Makefile
net/ipv4/af_inet.c
net/ipv4/icmp.c
net/ipv4/ip_masq.c
net/ipv4/ip_output.c
net/ipv4/packet.c
net/ipv4/proc.c
net/ipv4/protocol.c
net/ipv4/raw.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c [new file with mode: 0644]
net/ipv4/tcp_output.c
net/ipv4/tcp_timer.c
net/ipv4/timer.c
net/ipv4/udp.c
net/ipv6/Makefile [new file with mode: 0644]
net/ipv6/addrconf.c [new file with mode: 0644]
net/ipv6/af_inet6.c [new file with mode: 0644]
net/ipv6/datagram.c [new file with mode: 0644]
net/ipv6/exthdrs.c [new file with mode: 0644]
net/ipv6/icmp.c [new file with mode: 0644]
net/ipv6/ipv6_input.c [new file with mode: 0644]
net/ipv6/ipv6_output.c [new file with mode: 0644]
net/ipv6/ipv6_route.c [new file with mode: 0644]
net/ipv6/ipv6_sockglue.c [new file with mode: 0644]
net/ipv6/mcast.c [new file with mode: 0644]
net/ipv6/ndisc.c [new file with mode: 0644]
net/ipv6/protocol.c [new file with mode: 0644]
net/ipv6/raw.c [new file with mode: 0644]
net/ipv6/reassembly.c [new file with mode: 0644]
net/ipv6/sit.c [new file with mode: 0644]
net/ipv6/sysctl_net_ipv6.c [new file with mode: 0644]
net/ipv6/tcp_ipv6.c [new file with mode: 0644]
net/ipv6/udp.c [new file with mode: 0644]
net/netlink.c
net/netsyms.c
net/protocols.c
net/socket.c
net/sysctl_net.c
net/unix/af_unix.c
scripts/MAKEDEV.ide

diff --git a/CREDITS b/CREDITS
index 3b9dd035587fcf1936286be3e2063183baf7524f..b769ee6714fe91800585f4fa251cfa188a4addfe 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -707,12 +707,11 @@ S: Santa Clara, California 95050-3452
 S: USA
 
 N: Alain L. Knaff
-E: Alain.Knaff@imag.fr
+E: Alain.Knaff@poboxes.com
 D: floppy driver
-S: Appartement 310B
-S: 11, rue General Mangin
-S: 38100 Grenoble
-S: France
+S: 2a, rue de l'Acier
+S: L-4505 Differdange
+S: Luxembourg
 
 N: Harald Koenig
 E: koenig@tat.physik.uni-tuebingen.de
index 0eb664a73a73655fe2be63ff56b28ed9a1fd6439..e6cf888ce23669fc9595d3dbf6cfea55bac4f252 100644 (file)
@@ -1476,24 +1476,37 @@ CONFIG_SCSI_NCR53C8XX
   of PCI-SCSI controllers. This driver supports parity checking,
   tagged command queuing, fast scsi II transfer up to 10 MB/s with
   narrow scsi devices and 20 MB/s with wide scsi devices.
-  This driver has been tested OK with linux/i386 and is currently
-  untested under linux/Alpha. If you intend to use this driver under
-  linux/Alpha, just try it first with read-only or mounted read-only
-  devices.  Memory mapped io is currently not supported under
-  linux/Alpha. Please read drivers/scsi/README.ncr53c8xx for more
-  information. 
-
-force normal IO
+  Linux/i386 and Linux/Alpha are supported by this driver.
+  Memory mapped io is currently untested under Linux/Alpha.
+  Please read drivers/scsi/README.ncr53c8xx for more information.
+
+synchronous data transfers frequency
+CONFIG_SCSI_NCR53C8XX_SYNC
+  SCSI-2 specifications allow scsi devices to negotiate a synchronous 
+  transfer period of 25 nano-seconds or more.
+  The transfer period value is 4 times the agreed transfer period.
+  So, data can be transferred at a 10 MHz frequency, allowing 10 MB/second 
+  throughput with 8 bits scsi-2 devices and 20 MB/second with wide16 devices.
+  This frequency can be used safely with differential devices but may cause 
+  problems with singled-ended devices.
+  Specify 0 if you want to only use asynchronous data transfers.
+  Otherwise, specify a value between 5 and 10.
+  Commercial O/Ses generally use 5 Mhz frequency for synchronous transfers.
+  It is a reasonnable default value.
+  However, a flawless singled-ended scsi bus supports 10 MHz data transfers.
+  Regardless the value choosen in the Linux configuration, the synchronous 
+  period can be changed after boot-up through the /proc/scsi file system.
+  The generic command is:
+      echo "setsync #target period" >/proc/scsi/ncr53c8xx/0
+  Use a 25 ns period for 10 Mhz synchronous data transfers.
+
+use normal IO
 CONFIG_SCSI_NCR53C8XX_IOMAPPED
-  Under linux/Alpha only normal io is currently supported.
-  Under linux/i386, this option allows you to force the driver to use
-  normal IO.  Memory mapped IO has less latency than normal IO.
-  During the initialization phase, the driver first tries to use
-  memory mapped io.  If nothing seems wrong, it will use memory mapped
-  io.  If a flaw is detected, it will use normal io.  However, it's
-  possible that memory mapped does not work properly for you and the
-  driver has not detected the problem; then you would want to say Y
-  here.  The normal answer therefore is N.
+  Warning! Under linux/Alpha only normal io has been currently tested.
+  This option allows you to force the driver to use normal IO.
+  Memory mapped IO has less latency than normal IO and works for most 
+  Intel-based hardware.
+  The normal answer therefore is N.
 
 not allow targets to disconnect
 CONFIG_SCSI_NCR53C8XX_NO_DISCONNECT
@@ -1518,17 +1531,13 @@ CONFIG_SCSI_NCR53C8XX_TAGGED_QUEUE
   The safe answer therefore is N.
   The normal answer therefore is Y.
 
-force asynchronous transfer mode
-CONFIG_SCSI_NCR53C8XX_FORCE_ASYNCHRONOUS
-  This option allows you to force asynchronous transfer mode for all
-  devices at linux startup. You can enable synchronous negotiation
-  with the "setsync" control command after boot-up, for example:
-     echo "setsync 2 25" >/proc/scsi/ncr53c8xx/0
-  asks the driver to set the period to 25 ns (10MB/sec) for target 2
-  of controller 0 (please read drivers/scsi/README.ncr53c8xx for more
-  information). The safe answer therefore is Y.  The normal answer
-  therefore is N.
+maximum number of queued commands
+CONFIG_SCSI_NCR53C8XX_MAX_TAGS
+  This option allows you to specify the maximum number of commands that 
+  can be queud to a device, when tagged command queuing is possible.
+  The default value is 4. Minimum is 2, maximum is 12.
+  The normal answer therefore is the default one.
+
 force synchronous negotiation
 CONFIG_SCSI_NCR53C8XX_FORCE_SYNC_NEGO
   Some scsi-2 devices support synchronous negotiations but do not
index 277382b8c069c77e0be40b1cdddc02f75917cd7a..09a969f5cc5dbea55b1c90b1d0f4cb102409b59b 100644 (file)
@@ -1,4 +1,4 @@
-ide.txt -- Information regarding the Enhanced IDE drive in Linux 2.0.xx
+ide.txt -- Information regarding the Enhanced IDE drive in Linux 2.1.xx
 ===============================================================================
 Supported by:
        Mark Lord    <mlord@pobox.com>          -- disks, interfaces, probing
@@ -12,17 +12,18 @@ Supported by:
 
 See description later on below for handling BIG IDE drives with >1024 cyls.
 
-Major features of ide.c & ide-cd.c ("NEW!" marks changes since 1.2.13):
+Major features of the 2.1.xx IDE driver ("NEW!" marks changes since 2.0.xx):
 
-NEW!   - support for IDE ATAPI *tape* drives, courtesy of Gadi Oxman
+NEW!   - support for IDE ATAPI *floppy* drives
+       - support for IDE ATAPI *tape* drives, courtesy of Gadi Oxman
                (re-run MAKEDEV.ide to create the tape device entries in /dev/)
-NEW!   - support for up to *four* IDE interfaces on one or more IRQs
-NEW!   - support for any mix of up to *eight* disk and/or cdrom drives
+       - support for up to *four* IDE interfaces on one or more IRQs
+       - support for any mix of up to *eight* IDE drives
        - support for reading IDE ATAPI cdrom drives (NEC,MITSUMI,VERTOS,SONY)
        - support for audio functions
        - auto-detection of interfaces, drives, IRQs, and disk geometries
                - "single" drives should be jumpered as "master", not "slave"
-NEW!             (both are now probed for)
+                 (both are now probed for)
        - support for BIOSs which report "more than 16 heads" on disk drives
        - uses LBA (slightly faster) on disk drives which support it
        - support for lots of fancy (E)IDE drive functions with hdparm utility
@@ -32,44 +33,42 @@ NEW!                  (both are now probed for)
        - improved handshaking and error detection/recovery
        - can co-exist with hd.c controlling the first interface
        - run-time selectable 32bit interface support (using hdparm-2.3)
-NEW!   - support for reliable operation of buggy RZ1000 interfaces
+       - support for reliable operation of buggy RZ1000 interfaces
                - PCI support is automatic when rz1000 support is configured
-NEW!   - support for reliable operation of buggy CMD-640 interfaces
+       - support for reliable operation of buggy CMD-640 interfaces
                - PCI support is automatic when cmd640 support is configured
                - for VLB, use kernel command line option:   ide0=cmd640_vlb
                - this support also enables the secondary i/f when needed
                - interface PIO timing & prefetch parameter support
-NEW!   - experimental support for UMC 8672 interfaces
-NEW!   - support for secondary interface on the FGI/Holtek HT-6560B VLB i/f
+       - experimental support for UMC 8672 interfaces
+       - support for secondary interface on the FGI/Holtek HT-6560B VLB i/f
                - use kernel command line option:   ide0=ht6560
-NEW!   - experimental support for various IDE chipsets
+       - experimental support for various IDE chipsets
                - use appropriate kernel command line option from list below
-NEW!   - support for drives with a stuck WRERR_STAT bit
-NEW!   - support for removable devices, including door lock/unlock
-NEW!   - transparent support for DiskManager 6.0x and "Dynamic Disk Overlay"
+       - support for drives with a stuck WRERR_STAT bit
+       - support for removable devices, including door lock/unlock
+       - transparent support for DiskManager 6.0x and "Dynamic Disk Overlay"
        - works with Linux fdisk, LILO, loadlin, bootln, etc..
-NEW!   - mostly transparent support for EZ-Drive disk translation software
-NEW!           - to use LILO with EZ, install LILO on the linux partition
+       - mostly transparent support for EZ-Drive disk translation software
+               - to use LILO with EZ, install LILO on the linux partition
                  rather than on the master boot record, and then mark the
                  linux partition as "bootable" or "active" using fdisk.
                  (courtesy of Juha Laiho <jlaiho@ichaos.nullnet.fi>).
-NEW!   - auto-detect of disk translations by examining partition table
-NEW!   - ide-cd.c now compiles separate from ide.c
-NEW!   - Bus-Master DMA support for Intel PCI Triton chipset IDE interfaces
+       - auto-detect of disk translations by examining partition table
+       - ide-cd.c now compiles separate from ide.c
+       - Bus-Master DMA support for Intel PCI Triton chipset IDE interfaces
                - for details, see comments at top of triton.c
-NEW!    - ide-cd.c now supports door locking and auto-loading.
+       - ide-cd.c now supports door locking and auto-loading.
                - Also preliminary support for multisession
                  and direct reads of audio data.
-NEW!   - experimental support for Promise DC4030VL caching interface card
-NEW!           - email thanks/problems to: peterd@pnd-pc.demon.co.uk
-NEW!   - the hdparm-3.1 package can be used to set PIO modes for some chipsets.
+       - experimental support for Promise DC4030VL caching interface card
+               - email thanks/problems to: peterd@pnd-pc.demon.co.uk
+       - the hdparm-3.1 package can be used to set PIO modes for some chipsets.
+NEW!   - support for the OPTi 82C621 chipset, courtesy of Jaromir Koutek.
+NEW!   - support for loadable modules
 
-For work in progress, see the comments in ide.c, ide-cd.c, and triton.c.
-
-Note that there is now a group actively working on support for the Promise
-caching IDE cards, such as the DC4030VL, and early results are encouraging.
-Look for this support to be added to the kernel soon.
 
+For work in progress, see the comments in ide.c, ide-cd.c, triton.c, ...
 
 ***  IMPORTANT NOTICES:  BUGGY IDE CHIPSETS CAN CORRUPT DATA!!
 ***  =================
@@ -258,7 +257,7 @@ Summary of ide driver parameters for kernel "command line":
  "hdx=slow"            : insert a huge pause after each access to the data
                                port. Should be used only as a last resort.
 
- "idebus=xx"           : inform IDE driver of VESA/PCI bus speed in Mhz,
+ "idebus=xx"           : inform IDE driver of VESA/PCI bus speed in MHz,
                                where "xx" is between 20 and 66 inclusive,
                                used when tuning chipset PIO modes.
                                For PCI bus, 25 is correct for a P75 system,
@@ -311,7 +310,7 @@ decoding and tri-state buffers, although several newer localbus cards go much
 beyond the basics.  When purchasing a localbus IDE interface, avoid cards with
 an onboard BIOS and those which require special drivers.  Instead, look for a
 card which uses hardware switches/jumpers to select the interface timing speed,
-to allow much faster data transfers than the original 8Mhz ISA bus allows.
+to allow much faster data transfers than the original 8MHz ISA bus allows.
 
 ATA = AT (the old IBM 286 computer) Attachment Interface, a draft American
 National Standard for connecting hard drives to PCs.  This is the official
index e688cc234a4d7e00ddf15ce1fe30258619ba496d..03e2052abf12acb03788346dc02220cf4d308d31 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 2
 PATCHLEVEL = 1
-SUBLEVEL = 7
+SUBLEVEL = 8
 
 ARCH = i386
 
@@ -313,7 +313,7 @@ clean:      archclean
        rm -f core `find . -type f -name 'core' -print`
        rm -f vmlinux System.map
        rm -f .tmp* drivers/sound/configure
-       rm -fr modules/*
+       rm -f modules/*
        rm -f submenu*
 
 mrproper: clean
index 876c09fc87848c0b0e6e357e66bbd34e667b94bc..87035315e0c6f7f48340abf0451534b0345d00ec 100644 (file)
@@ -6,7 +6,7 @@ OBJS  = __divqu.o __remqu.o __divlu.o __remlu.o memset.o memcpy.o io.o \
        checksum.o csum_partial_copy.o strlen.o \
        strcat.o strcpy.o strncat.o strncpy.o stxcpy.o stxncpy.o \
        strchr.o strrchr.o \
-       copy_user.o clear_user.o strncpy_from_user.o
+       copy_user.o clear_user.o strncpy_from_user.o strlen_user.o
 
 lib.a: $(OBJS)
        $(AR) rcs lib.a $(OBJS)
diff --git a/arch/alpha/lib/strlen_user.S b/arch/alpha/lib/strlen_user.S
new file mode 100644 (file)
index 0000000..e702893
--- /dev/null
@@ -0,0 +1,65 @@
+/*
+ * arch/alpha/lib/strlen_user.S
+ *
+ * Just like strlen except returns -EFAULT if an exception occurs
+ * before the terminator is found.
+ */
+
+#include <alpha/regdef.h>
+
+
+/* Allow an exception for an insn; exit if we get one.  */
+#define EX(x,y...)                     \
+       99: x,##y;                      \
+       .section __ex_table,"a";        \
+       .gprel32 99b;                   \
+       lda zero, $exception-99b(v0);   \
+       .text
+
+
+       .set noreorder
+       .set noat
+       .text
+
+       .globl strlen_user
+       .ent strlen_user
+       .frame sp, 0, ra
+
+       .align 3
+strlen_user:
+       .prologue 0
+
+       EX( ldq_u t0, 0(a0) )   # load first quadword (a0 may be misaligned)
+       lda     t1, -1(zero)
+       insqh   t1, a0, t1
+       andnot  a0, 7, v0
+       or      t1, t0, t0
+       nop                     # dual issue the next two on ev5
+       cmpbge  zero, t0, t1    # t1 <- bitmask: bit i == 1 <==> i-th byte == 0
+       bne     t1, $found
+
+$loop: EX( ldq t0, 8(v0) )
+       addq    v0, 8, v0       # addr += 8
+       cmpbge  zero, t0, t1
+       beq     t1, $loop
+
+$found:        negq    t1, t2          # clear all but least set bit
+       and     t1, t2, t1
+
+       and     t1, 0xf0, t2    # binary search for that set bit
+       and     t1, 0xcc, t3
+       and     t1, 0xaa, t4
+       cmovne  t2, 4, t2
+       cmovne  t3, 2, t3
+       cmovne  t4, 1, t4
+       addq    t2, t3, t2
+       addq    v0, t4, v0
+       addq    v0, t2, v0
+       nop                     # dual issue next two on ev4 and ev5
+
+       subq    v0, a0, v0
+
+$exception:
+       ret
+
+       .end strlen_user
index 01c3aeae652c05bacd6bbf2643a9fdd842b8a4bf..07f6819a880f40661fb486853a28aee819554940 100644 (file)
@@ -39,8 +39,10 @@ CONFIG_BLK_DEV_IDE=y
 # Please see Documentation/ide.txt for help/info on IDE drives
 #
 # CONFIG_BLK_DEV_HD_IDE is not set
+CONFIG_BLK_DEV_IDEDISK=y
 CONFIG_BLK_DEV_IDECD=y
 # CONFIG_BLK_DEV_IDETAPE is not set
+# CONFIG_BLK_DEV_IDEFLOPPY is not set
 # CONFIG_BLK_DEV_IDE_PCMCIA is not set
 CONFIG_BLK_DEV_CMD640=y
 # CONFIG_BLK_DEV_CMD640_ENHANCED is not set
index 19b91d82148f874441dc45152e9f41bee9b84a44..753d5bbb63b08a47e686fa2c57f57b799db3f6d5 100644 (file)
@@ -271,15 +271,11 @@ ENTRY(system_call)
 #ifdef __SMP__
        ENTER_KERNEL
 #endif
-       movl $-ENOSYS,EAX(%esp)
        cmpl $(NR_syscalls),%eax
-       jae ret_from_sys_call
-       movl SYMBOL_NAME(sys_call_table)(,%eax,4),%eax
-       testl %eax,%eax
-       je ret_from_sys_call
+       jae badsys
        testb $0x20,flags(%ebx)         # PF_TRACESYS
        jne tracesys
-       call *%eax
+       call SYMBOL_NAME(sys_call_table)(,%eax,4)
        movl %eax,EAX(%esp)             # save the return value
        ALIGN
        .globl ret_from_sys_call
@@ -327,6 +323,9 @@ tracesys:
        movl %eax,EAX(%esp)             # save the return value
        call SYMBOL_NAME(syscall_trace)
        jmp ret_from_sys_call
+badsys:
+       movl $-ENOSYS,EAX(%esp)
+       jmp ret_from_sys_call
 
 
 ENTRY(divide_error)
@@ -452,6 +451,11 @@ ENTRY(page_fault)
        pushl $ SYMBOL_NAME(do_page_fault)
        jmp error_code
 
+ENTRY(spurious_interrupt_bug)
+       pushl $0
+       pushl $ SYMBOL_NAME(do_spurious_interrupt_bug)
+       jmp error_code
+
 .data
 ENTRY(sys_call_table)
        .long SYMBOL_NAME(sys_setup)            /* 0 */
@@ -591,7 +595,7 @@ ENTRY(sys_call_table)
        .long SYMBOL_NAME(sys_bdflush)
        .long SYMBOL_NAME(sys_sysfs)            /* 135 */
        .long SYMBOL_NAME(sys_personality)
-       .long 0                                 /* for afs_syscall */
+       .long SYMBOL_NAME(sys_ni_syscall)       /* for afs_syscall */
        .long SYMBOL_NAME(sys_setfsuid)
        .long SYMBOL_NAME(sys_setfsgid)
        .long SYMBOL_NAME(sys_llseek)           /* 140 */
@@ -620,4 +624,6 @@ ENTRY(sys_call_table)
        .long SYMBOL_NAME(sys_mremap)
        .long SYMBOL_NAME(sys_setresuid)
        .long SYMBOL_NAME(sys_getresuid)
-       .space (NR_syscalls-165)*4
+       .rept NR_syscalls-165
+               .long SYMBOL_NAME(sys_ni_syscall)
+       .endr
index 0903c624ecc82026a42eb04867e856d66b14db92..e131e5a45af683bb08995a6b2958149f94baa00b 100644 (file)
@@ -522,7 +522,7 @@ int probe_irq_off (unsigned long irqs)
 
        irqmask = (((unsigned int)cache_A1)<<8) | (unsigned int)cache_21;
 #ifdef DEBUG
-       printk("probe_irq_off: irqs=0x%04x irqmask=0x%04x\n", irqs, irqmask);
+       printk("probe_irq_off: irqs=0x%04lx irqmask=0x%04x\n", irqs, irqmask);
 #endif
        irqs &= irqmask;
        if (!irqs)
index cde6354ee7362d23d8a0975904856df20acf9efe..3dae50628c4227e937aabb614499f65ee76e3cc1 100644 (file)
@@ -185,7 +185,7 @@ void setup_arch(char **cmdline_p,
 
 #ifdef CONFIG_BLK_DEV_INITRD
        if (LOADER_TYPE) {
-               initrd_start = INITRD_START + PAGE_OFFSET;
+               initrd_start = INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
                initrd_end = initrd_start+INITRD_SIZE;
                if (initrd_end > memory_end) {
                        printk("initrd extends beyond end of memory "
index d2960d6e5f7cb2e95c7b5116abfc7f33483b7537..2016a90f107aa29fa240b6901d5240361326d0c8 100644 (file)
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/delay.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include <asm/irq.h>
+#include <asm/delay.h>
 
 #include <linux/mc146818rtc.h>
 #include <linux/timex.h>
@@ -462,6 +465,21 @@ void time_init(void)
                                    needs more debugging. */
        if (x86_capability & 16) {
                do_gettimeoffset = do_fast_gettimeoffset;
+
+               if( strcmp( x86_vendor_id, "AuthenticAMD" ) == 0 ) {
+                       if( x86 == 5 ) {
+                               if( x86_model == 0 ) {
+                                       /* turn on cycle counters during power down */
+                                       __asm__ __volatile__ (" movl $0x83, %%ecx \n \
+                                                               rdmsr \n \
+                                                               orl $1,%%eax \n \
+                                                               wrmsr \n " 
+                                                                : : : "ax", "cx", "dx" );
+                                       udelay(500);
+                               }
+                       }
+               }
+
                /* read Pentium cycle counter */
                __asm__(".byte 0x0f,0x31"
                        :"=a" (init_timer_cc.low),
index b285161b5b5f7d3ba1baf31826be46418b6abd6b..8dfab81c503f6fd686a203dcbc69077f14ec35ed 100644 (file)
@@ -81,6 +81,7 @@ asmlinkage void page_fault(void);
 asmlinkage void coprocessor_error(void);
 asmlinkage void reserved(void);
 asmlinkage void alignment_check(void);
+asmlinkage void spurious_interrupt_bug(void);
 
 int kstack_depth_to_print = 24;
 
@@ -174,8 +175,8 @@ DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun
 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS, current)
 DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present, current)
 DO_ERROR(12, SIGBUS,  "stack segment", stack_segment, current)
-DO_ERROR(15, SIGSEGV, "reserved", reserved, current)
 DO_ERROR(17, SIGSEGV, "alignment check", alignment_check, current)
+DO_ERROR(18, SIGSEGV, "reserved", reserved, current)
 
 asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
 {
@@ -259,6 +260,12 @@ asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code)
        math_error();
 }
 
+asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs,
+                                         long error_code)
+{
+       printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
+}
+
 /*
  *  'math_state_restore()' saves the current math information in the
  * old math state array, and gets the new ones from the current task
@@ -344,7 +351,7 @@ void trap_init(void)
        set_trap_gate(12,&stack_segment);
        set_trap_gate(13,&general_protection);
        set_trap_gate(14,&page_fault);
-       set_trap_gate(15,&reserved);
+       set_trap_gate(15,&spurious_interrupt_bug);
        set_trap_gate(16,&coprocessor_error);
        set_trap_gate(17,&alignment_check);
        for (i=18;i<48;i++)
index b5b0367fbc8c35483425ef733b85367dde725c5b..c43a8d43d5a7651b7e163dcfbe3b94d9df13b5e3 100644 (file)
@@ -2,6 +2,8 @@
  * linux/arch/i386/mm/extable.c
  */
 
+#include <linux/config.h>
+#include <linux/module.h>
 #include <asm/uaccess.h>
 
 extern const struct exception_table_entry __start___ex_table[];
@@ -37,6 +39,9 @@ unsigned long
 search_exception_table(unsigned long addr)
 {
        unsigned long ret;
+#ifdef CONFIG_MODULES
+       struct module *mp;
+#endif
 
        /* Search the kernel's table first.  */
        ret = search_one_table(__start___ex_table,
@@ -44,7 +49,15 @@ search_exception_table(unsigned long addr)
        if (ret)
                return ret;
 
-       /* FIXME -- search the module's tables here */
-
+#ifdef CONFIG_MODULES
+       for (mp = module_list; mp != NULL; mp = mp->next) {
+               if (mp->exceptinfo.start != NULL) {
+                       ret = search_one_table(mp->exceptinfo.start,
+                               mp->exceptinfo.stop-1, addr);
+                       if (ret)
+                               return ret;
+               }
+       }
+#endif
        return 0;
 }
index 8a82be452d23ce183b3bfa35330b8f59e26bb7a6..dd45bea571f07a89b4535659038ef102920e1bae 100644 (file)
@@ -71,9 +71,12 @@ mainmenu_option next_comment
 comment 'Floppy, IDE, and other block devices'
 
 tristate 'Normal floppy disk support' CONFIG_BLK_DEV_FD
-bool 'IDE harddisk support' CONFIG_BLK_DEV_IDE
-if [ "$CONFIG_BLK_DEV_IDE" = "y" ]; then
-  bool '   Include IDE/ATAPI CDROM support' CONFIG_BLK_DEV_IDECD
+tristate 'Enhanced IDE/MFM/RLL disk/cdrom/tape/floppy support' CONFIG_BLK_DEV_IDE
+if [ "$CONFIG_BLK_DEV_IDE" != "n" ]; then
+  dep_tristate '   Include IDE/ATA-2 DISK support' CONFIG_BLK_DEV_IDEDISK $CONFIG_BLK_DEV_IDE
+  dep_tristate '   Include IDE/ATAPI CDROM support' CONFIG_BLK_DEV_IDECD $CONFIG_BLK_DEV_IDE
+  dep_tristate '   Include IDE/ATAPI TAPE support' CONFIG_BLK_DEV_IDETAPE $CONFIG_BLK_DEV_IDE
+  dep_tristate '   Include IDE/ATAPI FLOPPY support' CONFIG_BLK_DEV_IDEFLOPPY $CONFIG_BLK_DEV_IDE
 fi
 if [ "$CONFIG_AMIGA" = "y" ]; then
 tristate 'Amiga Zorro II ramdisk support' CONFIG_AMIGA_Z2RAM
index 050570a07214fa51f15bd328fb07470655c608ad..99a5773f3e0d5803c2e429134e6de752935b8127 100644 (file)
@@ -5,34 +5,40 @@ mainmenu_option next_comment
 comment 'Floppy, IDE, and other block devices'
 
 tristate 'Normal floppy disk support' CONFIG_BLK_DEV_FD
-bool 'Enhanced IDE/MFM/RLL disk/cdrom/tape support' CONFIG_BLK_DEV_IDE
+tristate 'Enhanced IDE/MFM/RLL disk/cdrom/tape/floppy support' CONFIG_BLK_DEV_IDE
 comment 'Please see Documentation/ide.txt for help/info on IDE drives'
 if [ "$CONFIG_BLK_DEV_IDE" = "n" ]; then
   bool 'Old harddisk (MFM/RLL/IDE) driver' CONFIG_BLK_DEV_HD_ONLY
 else
   bool '   Use old disk-only driver on primary interface' CONFIG_BLK_DEV_HD_IDE
-  bool '   Include IDE/ATAPI CDROM support' CONFIG_BLK_DEV_IDECD
-  bool '   Include IDE/ATAPI TAPE support' CONFIG_BLK_DEV_IDETAPE
-  bool '   Support removable IDE interfaces (PCMCIA)' CONFIG_BLK_DEV_IDE_PCMCIA
-  bool '   CMD640 chipset bugfix/support' CONFIG_BLK_DEV_CMD640
-  if [ "$CONFIG_BLK_DEV_CMD640" = "y" ]; then
-    bool '     CMD640 enhanced support' CONFIG_BLK_DEV_CMD640_ENHANCED
-  fi
-  if [ "$CONFIG_PCI" = "y" ]; then
-    bool '   RZ1000 chipset bugfix/support' CONFIG_BLK_DEV_RZ1000
-    bool '   Intel 82371 PIIX (Triton I/II) DMA support' CONFIG_BLK_DEV_TRITON
-  fi
-  bool '   Other IDE chipset support' CONFIG_IDE_CHIPSETS
-  if [ "$CONFIG_IDE_CHIPSETS" = "y" ]; then
-    comment 'Note: most of these also require special kernel boot parameters'
-    bool '      ALI M14xx       support' CONFIG_BLK_DEV_ALI14XX
-    bool '      DTC-2278        support' CONFIG_BLK_DEV_DTC2278
-    bool '      Holtek HT6560B  support' CONFIG_BLK_DEV_HT6560B
-    if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
-      bool '      PROMISE DC4030  support (EXPERIMENTAL)' CONFIG_BLK_DEV_PROMISE
+  dep_tristate '   Include IDE/ATA-2 DISK support' CONFIG_BLK_DEV_IDEDISK $CONFIG_BLK_DEV_IDE
+  dep_tristate '   Include IDE/ATAPI CDROM support' CONFIG_BLK_DEV_IDECD $CONFIG_BLK_DEV_IDE
+  dep_tristate '   Include IDE/ATAPI TAPE support' CONFIG_BLK_DEV_IDETAPE $CONFIG_BLK_DEV_IDE
+  dep_tristate '   Include IDE/ATAPI FLOPPY support' CONFIG_BLK_DEV_IDEFLOPPY $CONFIG_BLK_DEV_IDE
+  if [ "$CONFIG_BLK_DEV_IDE" = "y" ]; then
+    bool '   CMD640 chipset bugfix/support' CONFIG_BLK_DEV_CMD640
+    if [ "$CONFIG_BLK_DEV_CMD640" = "y" ]; then
+      bool '     CMD640 enhanced support' CONFIG_BLK_DEV_CMD640_ENHANCED
+    fi
+    if [ "$CONFIG_PCI" = "y" ]; then
+      bool '   RZ1000 chipset bugfix/support' CONFIG_BLK_DEV_RZ1000
+      bool '   Intel 82371 PIIX (Triton I/II) DMA support' CONFIG_BLK_DEV_TRITON
+    fi
+    bool '   Other IDE chipset support' CONFIG_IDE_CHIPSETS
+    if [ "$CONFIG_IDE_CHIPSETS" = "y" ]; then
+      comment 'Note: most of these also require special kernel boot parameters'
+      bool '      ALI M14xx       support' CONFIG_BLK_DEV_ALI14XX
+      bool '      DTC-2278        support' CONFIG_BLK_DEV_DTC2278
+      bool '      Holtek HT6560B  support' CONFIG_BLK_DEV_HT6560B
+      if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+        bool '      PROMISE DC4030  support (EXPERIMENTAL)' CONFIG_BLK_DEV_PROMISE
+        if [ "$CONFIG_PCI" = "y" ]; then
+          bool '      OPTi 82C621     support (EXPERIMENTAL)' CONFIG_BLK_DEV_OPTI621
+        fi
+      fi
+      bool '      QDI QD6580      support' CONFIG_BLK_DEV_QD6580
+      bool '      UMC 8672        support' CONFIG_BLK_DEV_UMC8672
     fi
-    bool '      QDI QD6580      support' CONFIG_BLK_DEV_QD6580
-    bool '      UMC 8672        support' CONFIG_BLK_DEV_UMC8672
   fi
 fi
 
index 2ebff1c0fc3000c93f47ace2a805e707a36857f1..28d3bb4983730a0465a9d2139aaf4c081dfeef47 100644 (file)
@@ -50,7 +50,11 @@ L_OBJS += hd.o
 endif
 
 ifeq ($(CONFIG_BLK_DEV_IDE),y)
-L_OBJS += ide.o
+L_OBJS += ide.o ide-probe.o
+else
+  ifeq ($(CONFIG_BLK_DEV_IDE),m)
+  M_OBJS += ide.o ide-probe.o
+  endif
 endif
 
 ifeq ($(CONFIG_BLK_DEV_RZ1000),y)
@@ -89,12 +93,40 @@ ifeq ($(CONFIG_BLK_DEV_PROMISE),y)
 L_OBJS += promise.o
 endif
 
+ifeq ($(CONFIG_BLK_DEV_OPTI621),y)
+L_OBJS += opti621.o
+endif
+
+ifeq ($(CONFIG_BLK_DEV_IDEDISK),y)
+L_OBJS += ide-disk.o
+else
+  ifeq ($(CONFIG_BLK_DEV_IDEDISK),m)
+  M_OBJS += ide-disk.o
+  endif
+endif
+
 ifeq ($(CONFIG_BLK_DEV_IDECD),y)
 L_OBJS += ide-cd.o
+else
+  ifeq ($(CONFIG_BLK_DEV_IDECD),m)
+  M_OBJS += ide-cd.o
+  endif
 endif
 
 ifeq ($(CONFIG_BLK_DEV_IDETAPE),y)
 L_OBJS += ide-tape.o
+else
+  ifeq ($(CONFIG_BLK_DEV_IDETAPE),m)
+  M_OBJS += ide-tape.o
+  endif
+endif
+
+ifeq ($(CONFIG_BLK_DEV_IDEFLOPPY),y)
+L_OBJS += ide-floppy.o
+else
+  ifeq ($(CONFIG_BLK_DEV_IDEFLOPPY),m)
+  M_OBJS += ide-floppy.o
+  endif
 endif
 
 ifeq ($(CONFIG_BLK_DEV_XD),y)
index b8132dcd27c15b418cf308b3015aebb9865beee4..1110512e55f996645e392e94dbd2491fafcd1076 100644 (file)
@@ -431,9 +431,9 @@ static void setup_device_ptrs (void)
        for (i = 0; i < MAX_HWIFS; i++) {
                ide_hwif_t *hwif = &ide_hwifs[i];
                if (hwif->chipset == ide_unknown || hwif->chipset == ide_generic) {
-                       if (hwif->io_base == 0x1f0)
+                       if (hwif->io_ports[IDE_DATA_OFFSET] == 0x1f0)
                                cmd_hwif0 = hwif;
-                       else if (hwif->io_base == 0x170)
+                       else if (hwif->io_ports[IDE_DATA_OFFSET] == 0x170)
                                cmd_hwif1 = hwif;
                }
        }
@@ -678,11 +678,10 @@ static void cmd640_tune_drive (ide_drive_t *drive, byte mode_wanted)
        (void) ide_get_best_pio_mode (drive, mode_wanted, 5, &d);
        cmd640_set_mode (index, d.pio_mode, d.cycle_time);
 
-       printk ("%s: selected cmd640 PIO mode%d (%dns) %s/IORDY%s",
+       printk ("%s: selected cmd640 PIO mode%d (%dns)%s",
                drive->name,
                d.pio_mode,
                d.cycle_time,
-               d.use_iordy ? "w" : "wo",
                d.overridden ? " (overriding vendor mode)" : "");
        display_clocks(index);
 }
index 9ae5eab73cf766133f869d47a96833d97f4e3f97..e32082e2a8472cd3c3e154be721f919396bd2c1e 100644 (file)
 
 /* do print messages for unexpected interrupts */
 static int print_unex=1;
-#include <linux/utsname.h>
 #include <linux/module.h>
 
 /* the following is the mask of allowed drives. By default units 2 and
@@ -132,21 +131,6 @@ static int allowed_drive_mask = 0x33;
 #include <linux/fd.h>
 #include <linux/hdreg.h>
 
-#define OLDFDRAWCMD 0x020d /* send a raw command to the FDC */
-
-struct old_floppy_raw_cmd {
-  void *data;
-  long length;
-
-  unsigned char rate;
-  unsigned char flags;
-  unsigned char cmd_count;
-  unsigned char cmd[9];
-  unsigned char reply_count;
-  unsigned char reply[7];
-  int track;
-};
-
 #include <linux/errno.h>
 #include <linux/malloc.h>
 #include <linux/mm.h>
@@ -2419,6 +2403,17 @@ static void copy_buffer(int ssize, int max_sector, int max_sector_2)
 #endif
 }
 
+static inline int check_dma_crossing(char *start, 
+                                    unsigned long length, char *message)
+{
+       if (CROSS_64KB(start, length)) {
+               printk("DMA xfer crosses 64KB boundary in %s %p-%p\n", 
+                      message, start, start+length);
+               return 1;
+       } else
+               return 0;
+}
+
 /*
  * Formulate a read/write request.
  * this routine decides where to load the data (directly to buffer, or to
@@ -2570,6 +2565,9 @@ static int make_raw_rw_request(void)
                                        indirect, direct, sector_t);
                                return 0;
                        }
+                       check_dma_crossing(raw_cmd->kernel_data, 
+                                          raw_cmd->length, 
+                                          "end of make_raw_request [1]");
                        return 2;
                }
        }
@@ -2615,6 +2613,8 @@ static int make_raw_rw_request(void)
        raw_cmd->length = ((raw_cmd->length -1)|(ssize-1))+1;
        raw_cmd->length <<= 9;
 #ifdef FLOPPY_SANITY_CHECK
+       check_dma_crossing(raw_cmd->kernel_data, raw_cmd->length, 
+                          "end of make_raw_request");
        if ((raw_cmd->length < current_count_sectors << 9) ||
            (raw_cmd->kernel_data != CURRENT->buffer &&
             CT(COMMAND) == FD_WRITE &&
@@ -2850,13 +2850,11 @@ static inline int fd_copyin(void *param, void *address, int size)
        return copy_from_user(address, param, size) ? -EFAULT : 0;
 }
 
-static inline int write_user_long(unsigned long useraddr, unsigned long value)
-{
-       return put_user(value, (unsigned long *)useraddr) ? -EFAULT : 0;
-}
+#define _COPYOUT(x) (copy_to_user((void *)param, &(x), sizeof(x)) ? -EFAULT : 0)
+#define _COPYIN(x) (copy_from_user(&(x), (void *)param, sizeof(x)) ? -EFAULT : 0)
 
-#define COPYOUT(x) ECALL(fd_copyout((void *)param, &(x), sizeof(x)))
-#define COPYIN(x) ECALL(fd_copyin((void *)param, &(x), sizeof(x)))
+#define COPYOUT(x) ECALL(_COPYOUT(x))
+#define COPYIN(x) ECALL(_COPYIN(x))
 
 static inline const char *drive_name(int type, int drive)
 {
@@ -2927,24 +2925,11 @@ static struct cont_t raw_cmd_cont={
 static inline int raw_cmd_copyout(int cmd, char *param,
                                  struct floppy_raw_cmd *ptr)
 {
-       struct old_floppy_raw_cmd old_raw_cmd;
        int ret;
 
        while(ptr) {
-               if (cmd == OLDFDRAWCMD) {
-                       old_raw_cmd.flags = ptr->flags;
-                       old_raw_cmd.data = ptr->data;
-                       old_raw_cmd.length = ptr->length;
-                       old_raw_cmd.rate = ptr->rate;
-                       old_raw_cmd.reply_count = ptr->reply_count;
-                       memcpy(old_raw_cmd.reply, ptr->reply, 7);
-                       COPYOUT(old_raw_cmd);
-                       param += sizeof(old_raw_cmd);
-               } else {
-                       COPYOUT(*ptr);
-                       param += sizeof(struct floppy_raw_cmd);
-               }
-
+               COPYOUT(*ptr);
+               param += sizeof(struct floppy_raw_cmd);
                if ((ptr->flags & FD_RAW_READ) && ptr->buffer_length){
                        if (ptr->length>=0 && ptr->length<=ptr->buffer_length)
                                ECALL(fd_copyout(ptr->data, 
@@ -2981,7 +2966,6 @@ static inline int raw_cmd_copyin(int cmd, char *param,
                                 struct floppy_raw_cmd **rcmd)
 {
        struct floppy_raw_cmd *ptr;
-       struct old_floppy_raw_cmd old_raw_cmd;
        int ret;
        int i;
        
@@ -2992,37 +2976,20 @@ static inline int raw_cmd_copyin(int cmd, char *param,
                if (!ptr)
                        return -ENOMEM;
                *rcmd = ptr;
-               if (cmd == OLDFDRAWCMD){
-                       COPYIN(old_raw_cmd);
-                       ptr->flags = old_raw_cmd.flags;
-                       ptr->data = old_raw_cmd.data;
-                       ptr->length = old_raw_cmd.length;
-                       ptr->rate = old_raw_cmd.rate;
-                       ptr->cmd_count = old_raw_cmd.cmd_count;
-                       ptr->track = old_raw_cmd.track;
-                       ptr->phys_length = 0;
-                       ptr->next = 0;
-                       ptr->buffer_length = 0;
-                       memcpy(ptr->cmd, old_raw_cmd.cmd, 9);
-                       param += sizeof(struct old_floppy_raw_cmd);
-                       if (ptr->cmd_count > 9)
-                               return -EINVAL;
-               } else {
-                       COPYIN(*ptr);
-                       ptr->next = 0;
-                       ptr->buffer_length = 0;
-                       param += sizeof(struct floppy_raw_cmd);
-                       if (ptr->cmd_count > 33)
-                               /* the command may now also take up the space
-                                * initially intended for the reply & the
-                                * reply count. Needed for long 82078 commands
-                                * such as RESTORE, which takes ... 17 command
-                                * bytes. Murphy's law #137: When you reserve
-                                * 16 bytes for a structure, you'll one day
-                                * discover that you really need 17...
-                                */
-                               return -EINVAL;
-               }
+               COPYIN(*ptr);
+               ptr->next = 0;
+               ptr->buffer_length = 0;
+               param += sizeof(struct floppy_raw_cmd);
+               if (ptr->cmd_count > 33)
+                       /* the command may now also take up the space
+                        * initially intended for the reply & the
+                        * reply count. Needed for long 82078 commands
+                        * such as RESTORE, which takes ... 17 command
+                        * bytes. Murphy's law #137: When you reserve
+                        * 16 bytes for a structure, you'll one day
+                        * discover that you really need 17...
+                        */
+                       return -EINVAL;
 
                for (i=0; i< 16; i++)
                        ptr->reply[i] = 0;
@@ -3037,9 +3004,6 @@ static inline int raw_cmd_copyin(int cmd, char *param,
                                return -ENOMEM;
                        ptr->buffer_length = ptr->length;
                }
-               if ( ptr->flags & FD_RAW_READ )
-                   ECALL( verify_area( VERIFY_WRITE, ptr->data, 
-                                       ptr->length ));
                if (ptr->flags & FD_RAW_WRITE)
                        ECALL(fd_copyin(ptr->data, ptr->kernel_data, 
                                        ptr->length));
@@ -3181,47 +3145,42 @@ static inline int set_geometry(unsigned int cmd, struct floppy_struct *g,
 }
 
 /* handle obsolete ioctl's */
-static struct translation_entry {
-    int newcmd;
-    int oldcmd;
-    int oldsize; /* size of 0x00xx-style ioctl. Reflects old structures, thus
-                 * use numeric values. NO SIZEOFS */
-} translation_table[]= {
-    {FDCLRPRM,          0,  0},
-    {FDSETPRM,          1, 28},
-    {FDDEFPRM,          2, 28},
-    {FDGETPRM,          3, 28},
-    {FDMSGON,           4,  0},
-    {FDMSGOFF,          5,  0},
-    {FDFMTBEG,          6,  0},
-    {FDFMTTRK,          7, 12},
-    {FDFMTEND,          8,  0},
-    {FDSETEMSGTRESH,   10,  0},
-    {FDFLUSH,          11,  0},
-    {FDSETMAXERRS,     12, 20},
-    {OLDFDRAWCMD,              30,  0},
-    {FDGETMAXERRS,     14, 20},
-    {FDGETDRVTYP,      16, 16},
-    {FDSETDRVPRM,      20, 88},
-    {FDGETDRVPRM,      21, 88},
-    {FDGETDRVSTAT,     22, 52},
-    {FDPOLLDRVSTAT,    23, 52},
-    {FDRESET,          24,  0},
-    {FDGETFDCSTAT,     25, 40},
-    {FDWERRORCLR,      27,  0},
-    {FDWERRORGET,      28, 24},
-    {FDRAWCMD,          0,  0},
-    {FDEJECT,           0,  0},
-    {FDTWADDLE,                40,  0} };
-
-static inline int normalize_0x02xx_ioctl(int *cmd, int *size)
+int ioctl_table[]= {
+       FDCLRPRM,
+       FDSETPRM,
+       FDDEFPRM,
+       FDGETPRM,
+       FDMSGON,
+       FDMSGOFF,
+       FDFMTBEG,
+       FDFMTTRK,
+       FDFMTEND,
+       FDSETEMSGTRESH,
+       FDFLUSH,
+       FDSETMAXERRS,
+       FDGETMAXERRS,
+       FDGETDRVTYP,
+       FDSETDRVPRM,
+       FDGETDRVPRM,
+       FDGETDRVSTAT,
+       FDPOLLDRVSTAT,
+       FDRESET,
+       FDGETFDCSTAT,
+       FDWERRORCLR,
+       FDWERRORGET,
+       FDRAWCMD,
+       FDEJECT,
+       FDTWADDLE
+};
+
+static inline int normalize_ioctl(int *cmd, int *size)
 {
        int i;
 
-       for (i=0; i < ARRAY_SIZE(translation_table); i++) {
-               if ((*cmd & 0xffff) == (translation_table[i].newcmd & 0xffff)){
+       for (i=0; i < ARRAY_SIZE(ioctl_table); i++) {
+               if ((*cmd & 0xffff) == (ioctl_table[i] & 0xffff)){
                        *size = _IOC_SIZE(*cmd);
-                       *cmd = translation_table[i].newcmd;
+                       *cmd = ioctl_table[i];
                        if (*size > _IOC_SIZE(*cmd)) {
                                printk("ioctl not yet supported\n");
                                return -EFAULT;
@@ -3232,31 +3191,6 @@ static inline int normalize_0x02xx_ioctl(int *cmd, int *size)
        return -EINVAL;
 }
 
-static inline int xlate_0x00xx_ioctl(int *cmd, int *size)
-{
-       int i;
-       /* old ioctls' for kernels <= 1.3.33 */
-       /* When the next even release will come around, we'll start
-        * warning against these.
-        * When the next odd release will come around, we'll fail with
-        * -EINVAL */
-       if(strcmp(system_utsname.version, "1.4.0") >= 0)
-               printk("obsolete floppy ioctl %x\n", *cmd);
-       if((system_utsname.version[0] == '1' &&
-           strcmp(system_utsname.version, "1.5.0") >= 0) ||
-          (system_utsname.version[0] >= '2' &&
-           strcmp(system_utsname.version, "2.1.0") >= 0))
-               return -EINVAL;
-       for (i=0; i < ARRAY_SIZE(translation_table); i++) {
-               if (*cmd == translation_table[i].oldcmd) {
-                       *size = translation_table[i].oldsize;
-                       *cmd = translation_table[i].newcmd;
-                       return 0;
-               }
-       }
-       return -EINVAL;
-}
-
 static int get_floppy_geometry(int drive, int type, struct floppy_struct **g)
 {
        if (type)
@@ -3315,25 +3249,24 @@ static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
                /* the following have been inspired by the corresponding
                 * code for other block devices. */
                struct floppy_struct *g;
-               struct hd_geometry *loc;
-
                case HDIO_GETGEO:
-                       loc = (struct hd_geometry *) param;
+               {
+                       struct hd_geometry loc;
                        ECALL(get_floppy_geometry(drive, type, &g));
-                       ECALL(verify_area(VERIFY_WRITE, loc, sizeof(*loc)));
-                       put_user(g->head, &loc->heads);
-                       put_user(g->sect, &loc->sectors);
-                       put_user(g->track, &loc->cylinders);
-                       put_user(0,&loc->start);
-                       return 0;
+                       loc.heads = g->head;
+                       loc.sectors = g->sect;
+                       loc.cylinders = g->track;
+                       loc.start = 0;
+                       return _COPYOUT(loc);
+               }
                case BLKRASET:
                        if(!suser()) return -EACCES;
                        if(param > 0xff) return -EINVAL;
                        read_ahead[MAJOR(inode->i_rdev)] = param;
                        return 0;
                 case BLKRAGET:
-                       return write_user_long(param, 
-                                              read_ahead[MAJOR(inode->i_rdev)]);
+                       return put_user(read_ahead[MAJOR(inode->i_rdev)],
+                                       (int *) param);
                case BLKFLSBUF:
                        if(!suser()) return -EACCES;
                        fsync_dev(inode->i_rdev);
@@ -3342,16 +3275,14 @@ static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
 
                case BLKGETSIZE:
                        ECALL(get_floppy_geometry(drive, type, &g));
-                       return write_user_long(param, g->size);
+                       return put_user(g->size, (int *) param);
                /* BLKRRPART is not defined as floppies don't have
                 * partition tables */
        }
 
        /* convert the old style command into a new style command */
        if ((cmd & 0xff00) == 0x0200) {
-               ECALL(normalize_0x02xx_ioctl(&cmd, &size));
-       } else if ((cmd & 0xff00) == 0x0000) {
-               ECALL(xlate_0x00xx_ioctl(&cmd, &size));
+               ECALL(normalize_ioctl(&cmd, &size));
        } else
                return -EINVAL;
 
@@ -3360,10 +3291,6 @@ static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
             ((cmd & 0x40) && !IOCTL_ALLOWED))
                return -EPERM;
 
-       /* verify writability of result, and fail early */
-       if (_IOC_DIR(cmd) & _IOC_READ)
-               ECALL(verify_area(VERIFY_WRITE,(void *) param, size));
-               
        /* copyin */
        CLEARSTRUCT(&inparam);
        if (_IOC_DIR(cmd) & _IOC_WRITE)
@@ -3458,7 +3385,6 @@ static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
                        return 0;
                OUT(FDWERRORGET,UDRWE);
 
-               case OLDFDRAWCMD:
                case FDRAWCMD:
                        if (type)
                                return -EINVAL;
index 666058cdf5b4caa0ce801d7db53cd728c9831161..48aaf3fac6922a64ad4bac11a5f47f37fbc9ad9a 100644 (file)
@@ -66,7 +66,6 @@ char *disk_name (struct gendisk *hd, int minor, char *buf)
        const char *maj = hd->major_name;
        char unit = (minor >> hd->minor_shift) + 'a';
 
-#ifdef CONFIG_BLK_DEV_IDE
        /*
         * IDE devices use multiple major numbers, but the drives
         * are named as:  {hda,hdb}, {hdc,hdd}, {hde,hdf}, {hdg,hdh}..
@@ -82,7 +81,6 @@ char *disk_name (struct gendisk *hd, int minor, char *buf)
                case IDE0_MAJOR:
                        maj = "hd";
        }
-#endif
        part = minor & ((1 << hd->minor_shift) - 1);
        if (part)
                sprintf(buf, "%s%c%d", maj, unit, part);
index 136f972c329cc782010c7654713b6791da64cfc0..7e7ebcb48f6830996a2df19d66939f734a3c3c97 100644 (file)
  * 3.16  Jul 28, 1996 -- Fix from Gadi to reduce kernel stack usage for ioctl.
  * 3.17  Sep 17, 1996 -- Tweak audio reads for some drives.
  *                       Start changing CDROMLOADFROMSLOT to CDROM_SELECT_DISC.
+ * 3.17a Oct 31, 1996 -- Added module and DMA support.
  *
  * NOTE: Direct audio reads will only work on some types of drive.
  * So far, i've received reports of success for Sony and Toshiba drives.
 
 /***************************************************************************/
 
+#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
@@ -255,41 +257,127 @@ struct ide_cd_state_flags {
 #define CDROM_STATE_FLAGS(drive)  ((struct ide_cd_state_flags *)&((drive)->bios_head))
 
 
-#define SECTOR_BUFFER_SIZE CD_FRAMESIZE
+struct atapi_request_sense {
+       unsigned char error_code : 7;
+       unsigned char valid      : 1;
+       byte reserved1;
+       unsigned char sense_key  : 4;
+       unsigned char reserved2  : 1;
+       unsigned char ili        : 1;
+       unsigned char reserved3  : 2;
+       byte info[4];
+       byte sense_len;
+       byte command_info[4];
+       byte asc;
+       byte ascq;
+       byte fru;
+       byte sense_key_specific[3];
+};
 
+struct packet_command {
+       char *buffer;
+       int buflen;
+       int stat;
+       struct atapi_request_sense *sense_data;
+       unsigned char c[12];
+};
 
-\f
-/****************************************************************************
- * Routines to read and write data from/to the drive, using
- * the routines input_ide_data() and output_ide_data() from ide.c.
- *
- * These routines will round up any request for an odd number of bytes,
- * so if an odd bytecount is specified, be sure that there's at least one
- * extra byte allocated for the buffer.
- */
 
+/* Structure of a MSF cdrom address. */
+struct atapi_msf {
+       byte reserved;
+       byte minute;
+       byte second;
+       byte frame;
+};
 
-static inline
-void cdrom_in_bytes (ide_drive_t *drive, void *buffer, uint bytecount)
-{
-       ++bytecount;
-       ide_input_data (drive, buffer, bytecount / 4);
-       if ((bytecount & 0x03) >= 2) {
-               insw (IDE_DATA_REG, ((byte *)buffer) + (bytecount & ~0x03), 1);
-       }
-}
 
+/* Space to hold the disk TOC. */
 
-static inline
-void cdrom_out_bytes (ide_drive_t *drive, void *buffer, uint bytecount)
+#define MAX_TRACKS 99
+struct atapi_toc_header {
+       unsigned short toc_length;
+       byte first_track;
+       byte last_track;
+};
+
+struct atapi_toc_entry {
+       byte reserved1;
+       unsigned control : 4;
+       unsigned adr     : 4;
+       byte track;
+       byte reserved2;
+       union {
+               unsigned lba;
+               struct atapi_msf msf;
+       } addr;
+};
+
+struct atapi_toc {
+       int    last_session_lba;
+       int    xa_flag;
+       unsigned capacity;
+       struct atapi_toc_header hdr;
+       struct atapi_toc_entry  ent[MAX_TRACKS+1];
+         /* One extra for the leadout. */
+};
+
+
+/* This structure is annoyingly close to, but not identical with,
+   the cdrom_subchnl structure from cdrom.h. */
+struct atapi_cdrom_subchnl 
 {
-       ++bytecount;
-       ide_output_data (drive, buffer, bytecount / 4);
-       if ((bytecount & 0x03) >= 2) {
-               outsw (IDE_DATA_REG,
-                      ((byte *)buffer) + (bytecount & ~0x03), 1);
-       }
-}
+       u_char  acdsc_reserved;
+       u_char  acdsc_audiostatus;
+       u_short acdsc_length;
+       u_char  acdsc_format;
+
+       u_char  acdsc_adr:      4;
+       u_char  acdsc_ctrl:     4;
+       u_char  acdsc_trk;
+       u_char  acdsc_ind;
+       union {
+               struct atapi_msf msf;
+               int     lba;
+       } acdsc_absaddr;
+       union {
+               struct atapi_msf msf;
+               int     lba;
+       } acdsc_reladdr;
+};
+
+
+/* Extra per-device info for cdrom drives. */
+struct cdrom_info {
+
+       /* Buffer for table of contents.  NULL if we haven't allocated
+          a TOC buffer for this device yet. */
+
+       struct atapi_toc *toc;
+
+       /* Sector buffer.  If a read request wants only the first part
+          of a cdrom block, we cache the rest of the block here,
+          in the expectation that that data is going to be wanted soon.
+          SECTOR_BUFFERED is the number of the first buffered sector,
+          and NSECTORS_BUFFERED is the number of sectors in the buffer.
+          Before the buffer is allocated, we should have
+          SECTOR_BUFFER == NULL and NSECTORS_BUFFERED == 0. */
+
+       unsigned long sector_buffered;
+       unsigned long nsectors_buffered;
+       char *sector_buffer;
+
+       /* The result of the last successful request sense command
+          on this device. */
+       struct atapi_request_sense sense_data;
+
+       struct request request_sense_request;
+       struct packet_command request_sense_pc;
+       int dma;
+};
+
+
+#define SECTOR_BUFFER_SIZE CD_FRAMESIZE
 
 
 \f
@@ -573,6 +661,7 @@ static void cdrom_queue_request_sense (ide_drive_t *drive,
                                       struct atapi_request_sense *reqbuf,
                                       struct packet_command *failed_command)
 {
+       struct cdrom_info *info = drive->driver_data;
        struct request *rq;
        struct packet_command *pc;
        int len;
@@ -580,11 +669,11 @@ static void cdrom_queue_request_sense (ide_drive_t *drive,
        /* If the request didn't explicitly specify where
           to put the sense data, use the statically allocated structure. */
        if (reqbuf == NULL)
-               reqbuf = &drive->cdrom_info.sense_data;
+               reqbuf = &info->sense_data;
 
        /* Make up a new request to retrieve sense information. */
 
-       pc = &HWIF(drive)->request_sense_pc;
+       pc = &info->request_sense_pc;
        memset (pc, 0, sizeof (*pc));
 
        /* The request_sense structure has an odd number of (16-bit) words,
@@ -602,7 +691,7 @@ static void cdrom_queue_request_sense (ide_drive_t *drive,
 
        /* stuff the sense request in front of our current request */
 
-       rq = &HWIF(drive)->request_sense_request;
+       rq = &info->request_sense_request;
        ide_init_drive_cmd (rq);
        rq->cmd = REQUEST_SENSE_COMMAND;
        rq->buffer = (char *)pc;
@@ -641,9 +730,11 @@ static void cdrom_end_request (int uptodate, ide_drive_t *drive)
    buffers. */
 static void cdrom_saw_media_change (ide_drive_t *drive)
 {
+       struct cdrom_info *info = drive->driver_data;
+       
        CDROM_STATE_FLAGS (drive)->media_changed = 1;
        CDROM_STATE_FLAGS (drive)->toc_valid = 0;
-       drive->cdrom_info.nsectors_buffered = 0;
+       info->nsectors_buffered = 0;
 }
 
 
@@ -792,11 +883,16 @@ static int cdrom_decode_status (ide_drive_t *drive, int good_stat,
 static int cdrom_start_packet_command (ide_drive_t *drive, int xferlen,
                                       ide_handler_t *handler)
 {
+       struct cdrom_info *info = drive->driver_data;
+
        /* Wait for the controller to be idle. */
        if (ide_wait_stat (drive, 0, BUSY_STAT, WAIT_READY)) return 1;
 
+       if (info->dma)
+               info->dma = !HWIF(drive)->dmaproc(ide_dma_read, drive);
+
        /* Set up the controller registers. */
-       OUT_BYTE (0, IDE_FEATURE_REG);
+       OUT_BYTE (info->dma, IDE_FEATURE_REG);
        OUT_BYTE (0, IDE_NSECTOR_REG);
        OUT_BYTE (0, IDE_SECTOR_REG);
 
@@ -804,6 +900,9 @@ static int cdrom_start_packet_command (ide_drive_t *drive, int xferlen,
        OUT_BYTE (xferlen >> 8  , IDE_HCYL_REG);
        OUT_BYTE (drive->ctl, IDE_CONTROL_REG);
 
+       if (info->dma)
+               (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive));
+
        if (CDROM_CONFIG_FLAGS (drive)->drq_interrupt) {
                ide_set_handler (drive, handler, WAIT_CMD);
                OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG); /* packet command */
@@ -842,7 +941,7 @@ static int cdrom_transfer_packet_command (ide_drive_t *drive,
        ide_set_handler (drive, handler, WAIT_CMD);
 
        /* Send the command to the device. */
-       cdrom_out_bytes (drive, cmd_buf, cmd_len);
+       atapi_output_bytes (drive, cmd_buf, cmd_len);
 
        return 0;
 }
@@ -863,7 +962,7 @@ static int cdrom_transfer_packet_command (ide_drive_t *drive,
 static void cdrom_buffer_sectors (ide_drive_t *drive, unsigned long sector,
                                   int sectors_to_transfer)
 {
-       struct cdrom_info *info = &drive->cdrom_info;
+       struct cdrom_info *info = drive->driver_data;
 
        /* Number of sectors to read into the buffer. */
        int sectors_to_buffer = MIN (sectors_to_transfer,
@@ -892,7 +991,7 @@ static void cdrom_buffer_sectors (ide_drive_t *drive, unsigned long sector,
        /* Read the data into the buffer. */
        dest = info->sector_buffer + info->nsectors_buffered * SECTOR_SIZE;
        while (sectors_to_buffer > 0) {
-               cdrom_in_bytes (drive, dest, SECTOR_SIZE);
+               atapi_input_bytes (drive, dest, SECTOR_SIZE);
                --sectors_to_buffer;
                --sectors_to_transfer;
                ++info->nsectors_buffered;
@@ -902,7 +1001,7 @@ static void cdrom_buffer_sectors (ide_drive_t *drive, unsigned long sector,
        /* Throw away any remaining data. */
        while (sectors_to_transfer > 0) {
                char dum[SECTOR_SIZE];
-               cdrom_in_bytes (drive, dum, sizeof (dum));
+               atapi_input_bytes (drive, dum, sizeof (dum));
                --sectors_to_transfer;
        }
 }
@@ -929,7 +1028,7 @@ int cdrom_read_check_ireason (ide_drive_t *drive, int len, int ireason)
                   and quit this request. */
                while (len > 0) {
                        int dum = 0;
-                       cdrom_out_bytes (drive, &dum, sizeof (dum));
+                       atapi_output_bytes (drive, &dum, sizeof (dum));
                        len -= sizeof (dum);
                }
        } else {
@@ -950,12 +1049,34 @@ static void cdrom_read_intr (ide_drive_t *drive)
 {
        int stat;
        int ireason, len, sectors_to_transfer, nskip;
+       struct cdrom_info *info = drive->driver_data;
+       int i, dma = info->dma, dma_error = 0;
 
        struct request *rq = HWGROUP(drive)->rq;
 
        /* Check for errors. */
+       if (dma) {
+               info->dma = 0;
+               if ((dma_error = HWIF(drive)->dmaproc(ide_dma_status_bad, drive))) {
+                       printk ("%s: disabled DMA\n", drive->name);
+                       drive->using_dma = 0;
+               }
+               (void) (HWIF(drive)->dmaproc(ide_dma_abort, drive));
+       }
+
        if (cdrom_decode_status (drive, 0, &stat)) return;
 
+       if (dma) {
+               if (!dma_error) {
+                       for (i = rq->nr_sectors; i > 0;) {
+                               i -= rq->current_nr_sectors;
+                               ide_end_request(1, HWGROUP(drive));
+                       }
+               } else
+                       ide_error (drive, "dma error", stat);
+               return;
+       }
+
        /* Read the interrupt reason and the transfer length. */
        ireason = IN_BYTE (IDE_NSECTOR_REG);
        len = IN_BYTE (IDE_LCYL_REG) + 256 * IN_BYTE (IDE_HCYL_REG);
@@ -1000,7 +1121,7 @@ static void cdrom_read_intr (ide_drive_t *drive)
        while (nskip > 0) {
                /* We need to throw away a sector. */
                char dum[SECTOR_SIZE];
-               cdrom_in_bytes (drive, dum, sizeof (dum));
+               atapi_input_bytes (drive, dum, sizeof (dum));
 
                --rq->current_nr_sectors;
                --nskip;
@@ -1033,8 +1154,8 @@ static void cdrom_read_intr (ide_drive_t *drive)
                        /* Read this_transfer sectors
                           into the current buffer. */
                        while (this_transfer > 0) {
-                               cdrom_in_bytes (drive
-                                               , rq->buffer, SECTOR_SIZE);
+                               atapi_input_bytes (drive,
+                                                  rq->buffer, SECTOR_SIZE);
                                rq->buffer += SECTOR_SIZE;
                                --rq->nr_sectors;
                                --rq->current_nr_sectors;
@@ -1057,7 +1178,7 @@ static void cdrom_read_intr (ide_drive_t *drive)
  */
 static int cdrom_read_from_buffer (ide_drive_t *drive)
 {
-       struct cdrom_info *info = &drive->cdrom_info;
+       struct cdrom_info *info = drive->driver_data;
        struct request *rq = HWGROUP(drive)->rq;
 
        /* Can't do anything if there's no buffer. */
@@ -1177,6 +1298,7 @@ static void cdrom_start_read_continuation (ide_drive_t *drive)
  */
 static void cdrom_start_read (ide_drive_t *drive, unsigned int block)
 {
+       struct cdrom_info *info = drive->driver_data;
        struct request *rq = HWGROUP(drive)->rq;
        int minor = MINOR (rq->rq_dev);
 
@@ -1197,7 +1319,12 @@ static void cdrom_start_read (ide_drive_t *drive, unsigned int block)
                return;
 
        /* Clear the local sector buffer. */
-       drive->cdrom_info.nsectors_buffered = 0;
+       info->nsectors_buffered = 0;
+
+       if (drive->using_dma && (rq->sector % SECTORS_PER_FRAME == 0) && (rq->nr_sectors % SECTORS_PER_FRAME == 0))
+               info->dma = 1;
+       else
+               info->dma = 0;
 
        /* Start sending the read request to the drive. */
        cdrom_start_packet_command (drive, 32768,
@@ -1274,13 +1401,13 @@ static void cdrom_pc_intr (ide_drive_t *drive)
                }
 
                /* Transfer the data. */
-               cdrom_out_bytes (drive, pc->buffer, thislen);
+               atapi_output_bytes (drive, pc->buffer, thislen);
 
                /* If we haven't moved enough data to satisfy the drive,
                   add some padding. */
                while (len > thislen) {
                        int dum = 0;
-                       cdrom_out_bytes (drive, &dum, sizeof (dum));
+                       atapi_output_bytes (drive, &dum, sizeof (dum));
                        len -= sizeof (dum);
                }
 
@@ -1301,13 +1428,13 @@ static void cdrom_pc_intr (ide_drive_t *drive)
                }
 
                /* Transfer the data. */
-               cdrom_in_bytes (drive, pc->buffer, thislen);
+               atapi_input_bytes (drive, pc->buffer, thislen);
 
                /* If we haven't moved enough data to satisfy the drive,
                   add some padding. */
                while (len > thislen) {
                        int dum = 0;
-                       cdrom_in_bytes (drive, &dum, sizeof (dum));
+                       atapi_input_bytes (drive, &dum, sizeof (dum));
                        len -= sizeof (dum);
                }
 
@@ -1342,6 +1469,9 @@ static void cdrom_do_packet_command (ide_drive_t *drive)
        int len;
        struct request *rq = HWGROUP(drive)->rq;
        struct packet_command *pc = (struct packet_command *)rq->buffer;
+       struct cdrom_info *info = drive->driver_data;
+
+       info->dma = 0;
 
        len = pc->buflen;
        if (len < 0) len = -len;
@@ -1432,10 +1562,8 @@ int cdrom_queue_packet_command (ide_drive_t *drive, struct packet_command *pc)
  * cdrom driver request routine.
  */
 
-void ide_do_rw_cdrom (ide_drive_t *drive, unsigned long block)
+void ide_do_rw_cdrom (ide_drive_t *drive, struct request *rq, unsigned long block)
 {
-       struct request *rq = HWGROUP(drive)->rq;
-
        if (rq -> cmd == PACKET_COMMAND || rq -> cmd == REQUEST_SENSE_COMMAND)
                cdrom_do_packet_command (drive);
        else if (rq -> cmd == RESET_DRIVE_COMMAND) {
@@ -1670,7 +1798,8 @@ cdrom_read_toc (ide_drive_t *drive,
                struct atapi_request_sense *reqbuf)
 {
        int stat, ntracks, i;
-       struct atapi_toc *toc = drive->cdrom_info.toc;
+       struct cdrom_info *info = drive->driver_data;
+       struct atapi_toc *toc = info->toc;
        struct {
                struct atapi_toc_header hdr;
                struct atapi_toc_entry  ent;
@@ -1680,7 +1809,7 @@ cdrom_read_toc (ide_drive_t *drive,
                /* Try to allocate space. */
                toc = (struct atapi_toc *) kmalloc (sizeof (struct atapi_toc),
                                                    GFP_KERNEL);
-               drive->cdrom_info.toc = toc;
+               info->toc = toc;
        }
 
        if (toc == NULL) {
@@ -1904,6 +2033,7 @@ int cdrom_get_toc_entry (ide_drive_t *drive, int track,
                          struct atapi_toc_entry **ent,
                         struct atapi_request_sense *reqbuf)
 {
+       struct cdrom_info *info = drive->driver_data;
        int stat, ntracks;
        struct atapi_toc *toc;
 
@@ -1911,7 +2041,7 @@ int cdrom_get_toc_entry (ide_drive_t *drive, int track,
        stat = cdrom_read_toc (drive, reqbuf);
        if (stat) return stat;
 
-       toc = drive->cdrom_info.toc;
+       toc = info->toc;
 
        /* Check validity of requested track number. */
        ntracks = toc->hdr.last_track - toc->hdr.first_track + 1;
@@ -2026,6 +2156,8 @@ cdrom_load_unload (ide_drive_t *drive, int slot,
 int ide_cdrom_ioctl (ide_drive_t *drive, struct inode *inode,
                     struct file *file, unsigned int cmd, unsigned long arg)
 {
+       struct cdrom_info *info = drive->driver_data;
+       
        switch (cmd) {
        case CDROMEJECT: {
                int stat;
@@ -2133,7 +2265,7 @@ int ide_cdrom_ioctl (ide_drive_t *drive, struct inode *inode,
                stat = cdrom_read_toc (drive, NULL);
                if (stat) return stat;
 
-               toc = drive->cdrom_info.toc;
+               toc = info->toc;
                tochdr.cdth_trk0 = toc->hdr.first_track;
                tochdr.cdth_trk1 = toc->hdr.last_track;
 
@@ -2300,7 +2432,7 @@ int ide_cdrom_ioctl (ide_drive_t *drive, struct inode *inode,
                stat = cdrom_read_toc (drive, NULL);
                if (stat) return stat;
 
-               toc = drive->cdrom_info.toc;
+               toc = info->toc;
 
                if (ms_info.addr_format == CDROM_MSF)
                        lba_to_msf (toc->last_session_lba,
@@ -2330,7 +2462,7 @@ int ide_cdrom_ioctl (ide_drive_t *drive, struct inode *inode,
                stat = cdrom_read_toc (drive, NULL);
                if (stat) return stat;
 
-               toc = drive->cdrom_info.toc;
+               toc = info->toc;
 
                stat = verify_area (VERIFY_READ, (char *)arg, sizeof (ra));
                if (stat) return stat;
@@ -2411,7 +2543,7 @@ int ide_cdrom_ioctl (ide_drive_t *drive, struct inode *inode,
                stat = cdrom_read_toc (drive, NULL);
                if (stat) return stat;
 
-               toc = drive->cdrom_info.toc;
+               toc = info->toc;
 
                if (lba < 0 || lba >= toc->capacity)
                        return -EINVAL;
@@ -2569,6 +2701,8 @@ int ide_cdrom_open (struct inode *ip, struct file *fp, ide_drive_t *drive)
                return -EROFS;
        }
 
+       MOD_INC_USE_COUNT;
+
        /* If this is the first open, check the drive status. */
        if (drive->usage == 1) {
                int stat;
@@ -2613,6 +2747,7 @@ void ide_cdrom_release (struct inode *inode, struct file *file,
                if (CDROM_STATE_FLAGS (drive)->eject_on_close)
                        (void) cdrom_eject (drive, 0, NULL);
        }
+       MOD_DEC_USE_COUNT;
 }
 
 
@@ -2623,6 +2758,8 @@ void ide_cdrom_release (struct inode *inode, struct file *file,
 
 void ide_cdrom_setup (ide_drive_t *drive)
 {
+       struct cdrom_info *info = drive->driver_data;
+               
        blksize_size[HWIF(drive)->major][drive->select.b.unit << PARTN_BITS] =
                CD_FRAMESIZE;
 
@@ -2705,12 +2842,97 @@ void ide_cdrom_setup (ide_drive_t *drive)
        }
 #endif /* not STANDARD_ATAPI */
 
-       drive->cdrom_info.toc               = NULL;
-       drive->cdrom_info.sector_buffer     = NULL;
-       drive->cdrom_info.sector_buffered   = 0;
-       drive->cdrom_info.nsectors_buffered = 0;
+       info->toc               = NULL;
+       info->sector_buffer     = NULL;
+       info->sector_buffered   = 0;
+       info->nsectors_buffered = 0;
+}
+
+int ide_cdrom_cleanup(ide_drive_t *drive)
+{
+       struct cdrom_info *info = drive->driver_data;
+
+       if (ide_unregister_subdriver (drive))
+               return 1;
+       if (info->sector_buffer != NULL)
+               kfree (info->sector_buffer);
+       if (info->toc != NULL)
+               kfree (info->toc);
+       kfree (info);
+       drive->driver_data = NULL;
+       return 0;
+}
+
+int ide_cdrom_init (void);
+static ide_module_t ide_cdrom_module = {
+       IDE_DRIVER_MODULE,
+       ide_cdrom_init,
+       NULL
+};
+
+static ide_driver_t ide_cdrom_driver = {
+       ide_cdrom,                      /* media */
+       0,                              /* busy */
+       1,                              /* supports_dma */
+       ide_cdrom_cleanup,              /* cleanup */
+       ide_do_rw_cdrom,                /* do_request */
+       NULL,                           /* ??? or perhaps cdrom_end_request? */
+       ide_cdrom_ioctl,                /* ioctl */
+       ide_cdrom_open,                 /* open */
+       ide_cdrom_release,              /* release */
+       ide_cdrom_check_media_change,   /* media_change */
+       NULL,                           /* pre_reset */
+       NULL,                           /* capacity */
+       NULL                            /* special */
+};
+
+int ide_cdrom_init (void)
+{
+       ide_drive_t *drive;
+       struct cdrom_info *info;
+       int failed = 0;
+
+       MOD_INC_USE_COUNT;
+       while ((drive = ide_scan_devices (ide_cdrom, NULL, failed++)) != NULL) {
+               info = (struct cdrom_info *) kmalloc (sizeof (struct cdrom_info), GFP_KERNEL);
+               if (info == NULL) {
+                       printk ("%s: Can't allocate a cdrom structure\n", drive->name);
+                       continue;
+               }
+               if (ide_register_subdriver (drive, &ide_cdrom_driver, IDE_SUBDRIVER_VERSION)) {
+                       printk ("%s: Failed to register the driver with ide.c\n", drive->name);
+                       kfree (info);
+                       continue;
+               }
+               failed--;
+               memset (info, 0, sizeof (struct cdrom_info));
+               drive->driver_data = info;
+               ide_cdrom_setup (drive);
+       }
+       ide_register_module(&ide_cdrom_module);
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+#ifdef MODULE
+int init_module (void)
+{
+       return ide_cdrom_init();
 }
 
+void cleanup_module(void)
+{
+       ide_drive_t *drive;
+       int failed = 0;
+
+       while ((drive = ide_scan_devices (ide_cdrom, &ide_cdrom_driver, failed)) != NULL)
+               if (ide_cdrom_cleanup (drive)) {
+                       printk ("%s: cleanup_module() called while still busy\n", drive->name);
+                       failed++;
+               }
+       ide_unregister_module (&ide_cdrom_module);
+}
+#endif /* MODULE */
 
 
 /*
@@ -2720,7 +2942,6 @@ void ide_cdrom_setup (ide_drive_t *drive)
  *  Query the drive to find what features are available
  *   before trying to use them.
  *  Integrate spindown time adjustment patch.
- *  Modularize.
  *  CDROMRESET ioctl.
  *  Better support for changers.
  */
diff --git a/drivers/block/ide-disk.c b/drivers/block/ide-disk.c
new file mode 100644 (file)
index 0000000..97eef35
--- /dev/null
@@ -0,0 +1,660 @@
+/*
+ *  linux/drivers/block/ide-disk.c     Version 1.0  Oct   6, 1996
+ *
+ *  Copyright (C) 1994-1996  Linus Torvalds & authors (see below)
+ */
+
+/*
+ *  Maintained by Mark Lord  <mlord@pobox.com>
+ *            and Gadi Oxman <gadio@netvision.net.il>
+ *
+ * This is the IDE/ATA disk driver, as evolved from hd.c and ide.c.
+ *
+ *  From hd.c:
+ *  |
+ *  | It traverses the request-list, using interrupts to jump between functions.
+ *  | As nearly all functions can be called within interrupts, we may not sleep.
+ *  | Special care is recommended.  Have Fun!
+ *  |
+ *  | modified by Drew Eckhardt to check nr of hd's from the CMOS.
+ *  |
+ *  | Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug
+ *  | in the early extended-partition checks and added DM partitions.
+ *  |
+ *  | Early work on error handling by Mika Liljeberg (liljeber@cs.Helsinki.FI).
+ *  |
+ *  | IRQ-unmask, drive-id, multiple-mode, support for ">16 heads",
+ *  | and general streamlining by Mark Lord (mlord@pobox.com).
+ *
+ *  October, 1994 -- Complete line-by-line overhaul for linux 1.1.x, by:
+ *
+ *     Mark Lord       (mlord@pobox.com)               (IDE Perf.Pkg)
+ *     Delman Lee      (delman@mipg.upenn.edu)         ("Mr. atdisk2")
+ *     Scott Snyder    (snyder@fnald0.fnal.gov)        (ATAPI IDE cd-rom)
+ *
+ *  This was a rewrite of just about everything from hd.c, though some original
+ *  code is still sprinkled about.  Think of it as a major evolution, with
+ *  inspiration from lots of linux users, esp.  hamish@zot.apana.org.au
+ *
+ * Version 1.0         move disk only code from ide.c to ide-disk.c
+ *                     support optional byte-swapping of all data
+ */
+
+#undef REALLY_SLOW_IO          /* most systems can safely undef this */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/major.h>
+#include <linux/blkdev.h>
+#include <linux/errno.h>
+#include <linux/hdreg.h>
+#include <linux/genhd.h>
+#include <linux/malloc.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#include "ide.h"
+
+static void idedisk_bswap_data (void *buffer, int wcount)
+{
+       u16 *p = buffer;
+
+       while (wcount--) {
+               *p++ = *p << 8 | *p >> 8;
+               *p++ = *p << 8 | *p >> 8;
+       }
+}
+
+static inline void idedisk_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+       ide_input_data(drive, buffer, wcount);
+       if (drive->bswap)
+               idedisk_bswap_data(buffer, wcount);
+}
+
+static inline void idedisk_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
+{
+       ide_output_data(drive, buffer, wcount);
+       if (drive->bswap)
+               idedisk_bswap_data(buffer, wcount);
+}
+
+/*
+ * lba_capacity_is_ok() performs a sanity check on the claimed "lba_capacity"
+ * value for this drive (from its reported identification information).
+ *
+ * Returns:    1 if lba_capacity looks sensible
+ *             0 otherwise
+ */
+static int lba_capacity_is_ok (struct hd_driveid *id)
+{
+       unsigned long lba_sects   = id->lba_capacity;
+       unsigned long chs_sects   = id->cyls * id->heads * id->sectors;
+       unsigned long _10_percent = chs_sects / 10;
+
+       /* perform a rough sanity check on lba_sects:  within 10% is "okay" */
+       if ((lba_sects - chs_sects) < _10_percent)
+               return 1;       /* lba_capacity is good */
+
+       /* some drives have the word order reversed */
+       lba_sects = (lba_sects << 16) | (lba_sects >> 16);
+       if ((lba_sects - chs_sects) < _10_percent) {
+               id->lba_capacity = lba_sects;   /* fix it */
+               return 1;       /* lba_capacity is (now) good */
+       }
+       return 0;       /* lba_capacity value is bad */
+}
+
+/*
+ * read_intr() is the handler for disk read/multread interrupts
+ */
+static void read_intr (ide_drive_t *drive)
+{
+       byte stat;
+       int i;
+       unsigned int msect, nsect;
+       struct request *rq;
+
+       if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) {
+               ide_error(drive, "read_intr", stat);
+               return;
+       }
+       msect = drive->mult_count;
+read_next:
+       rq = HWGROUP(drive)->rq;
+       if (msect) {
+               if ((nsect = rq->current_nr_sectors) > msect)
+                       nsect = msect;
+               msect -= nsect;
+       } else
+               nsect = 1;
+       idedisk_input_data(drive, rq->buffer, nsect * SECTOR_WORDS);
+#ifdef DEBUG
+       printk("%s:  read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n",
+               drive->name, rq->sector, rq->sector+nsect-1,
+               (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect);
+#endif
+       rq->sector += nsect;
+       rq->buffer += nsect<<9;
+       rq->errors = 0;
+       i = (rq->nr_sectors -= nsect);
+       if ((rq->current_nr_sectors -= nsect) <= 0)
+               ide_end_request(1, HWGROUP(drive));
+       if (i > 0) {
+               if (msect)
+                       goto read_next;
+               ide_set_handler (drive, &read_intr, WAIT_CMD);
+       }
+}
+
+/*
+ * write_intr() is the handler for disk write interrupts
+ */
+static void write_intr (ide_drive_t *drive)
+{
+       byte stat;
+       int i;
+       ide_hwgroup_t *hwgroup = HWGROUP(drive);
+       struct request *rq = hwgroup->rq;
+
+       if (OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) {
+#ifdef DEBUG
+               printk("%s: write: sector %ld, buffer=0x%08lx, remaining=%ld\n",
+                       drive->name, rq->sector, (unsigned long) rq->buffer,
+                       rq->nr_sectors-1);
+#endif
+               if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) {
+                       rq->sector++;
+                       rq->buffer += 512;
+                       rq->errors = 0;
+                       i = --rq->nr_sectors;
+                       --rq->current_nr_sectors;
+                       if (rq->current_nr_sectors <= 0)
+                               ide_end_request(1, hwgroup);
+                       if (i > 0) {
+                               idedisk_output_data (drive, rq->buffer, SECTOR_WORDS);
+                               ide_set_handler (drive, &write_intr, WAIT_CMD);
+                       }
+                       return;
+               }
+       }
+       ide_error(drive, "write_intr", stat);
+}
+
+/*
+ * ide_multwrite() transfers a block of up to mcount sectors of data
+ * to a drive as part of a disk multiple-sector write operation.
+ */
+void ide_multwrite (ide_drive_t *drive, unsigned int mcount)
+{
+       struct request *rq = &HWGROUP(drive)->wrq;
+
+       do {
+               unsigned int nsect = rq->current_nr_sectors;
+               if (nsect > mcount)
+                       nsect = mcount;
+               mcount -= nsect;
+
+               idedisk_output_data(drive, rq->buffer, nsect<<7);
+#ifdef DEBUG
+               printk("%s: multwrite: sector %ld, buffer=0x%08lx, count=%d, remaining=%ld\n",
+                       drive->name, rq->sector, (unsigned long) rq->buffer,
+                       nsect, rq->nr_sectors - nsect);
+#endif
+               if ((rq->nr_sectors -= nsect) <= 0)
+                       break;
+               if ((rq->current_nr_sectors -= nsect) == 0) {
+                       if ((rq->bh = rq->bh->b_reqnext) != NULL) {
+                               rq->current_nr_sectors = rq->bh->b_size>>9;
+                               rq->buffer             = rq->bh->b_data;
+                       } else {
+                               panic("%s: buffer list corrupted\n", drive->name);
+                               break;
+                       }
+               } else {
+                       rq->buffer += nsect << 9;
+               }
+       } while (mcount);
+}
+
+/*
+ * multwrite_intr() is the handler for disk multwrite interrupts
+ */
+static void multwrite_intr (ide_drive_t *drive)
+{
+       byte stat;
+       int i;
+       ide_hwgroup_t *hwgroup = HWGROUP(drive);
+       struct request *rq = &hwgroup->wrq;
+
+       if (OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) {
+               if (stat & DRQ_STAT) {
+                       if (rq->nr_sectors) {
+                               ide_multwrite(drive, drive->mult_count);
+                               ide_set_handler (drive, &multwrite_intr, WAIT_CMD);
+                               return;
+                       }
+               } else {
+                       if (!rq->nr_sectors) {  /* all done? */
+                               rq = hwgroup->rq;
+                               for (i = rq->nr_sectors; i > 0;){
+                                       i -= rq->current_nr_sectors;
+                                       ide_end_request(1, hwgroup);
+                               }
+                               return;
+                       }
+               }
+       }
+       ide_error(drive, "multwrite_intr", stat);
+}
+
+/*
+ * set_multmode_intr() is invoked on completion of a WIN_SETMULT cmd.
+ */
+static void set_multmode_intr (ide_drive_t *drive)
+{
+       byte stat = GET_STAT();
+
+       if (OK_STAT(stat,READY_STAT,BAD_STAT)) {
+               drive->mult_count = drive->mult_req;
+       } else {
+               drive->mult_req = drive->mult_count = 0;
+               drive->special.b.recalibrate = 1;
+               (void) ide_dump_status(drive, "set_multmode", stat);
+       }
+}
+
+/*
+ * set_geometry_intr() is invoked on completion of a WIN_SPECIFY cmd.
+ */
+static void set_geometry_intr (ide_drive_t *drive)
+{
+       byte stat = GET_STAT();
+
+       if (!OK_STAT(stat,READY_STAT,BAD_STAT))
+               ide_error(drive, "set_geometry_intr", stat);
+}
+
+/*
+ * recal_intr() is invoked on completion of a WIN_RESTORE (recalibrate) cmd.
+ */
+static void recal_intr (ide_drive_t *drive)
+{
+       byte stat = GET_STAT();
+
+       if (!OK_STAT(stat,READY_STAT,BAD_STAT))
+               ide_error(drive, "recal_intr", stat);
+}
+
+/*
+ * do_rw_disk() issues READ and WRITE commands to a disk,
+ * using LBA if supported, or CHS otherwise, to address sectors.
+ * It also takes care of issuing special DRIVE_CMDs.
+ */
+static void do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+#ifdef CONFIG_BLK_DEV_PROMISE
+       ide_hwif_t *hwif = HWIF(drive);
+       int use_promise_io = 0;
+#endif /* CONFIG_BLK_DEV_PROMISE */
+
+       OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
+       OUT_BYTE(rq->nr_sectors,IDE_NSECTOR_REG);
+#ifdef CONFIG_BLK_DEV_PROMISE
+       if (IS_PROMISE_DRIVE) {
+               if (hwif->is_promise2 || rq->cmd == READ) {
+                       use_promise_io = 1;
+               }
+       }
+       if (drive->select.b.lba || use_promise_io) {
+#else /* !CONFIG_BLK_DEV_PROMISE */
+       if (drive->select.b.lba) {
+#endif /* CONFIG_BLK_DEV_PROMISE */
+#ifdef DEBUG
+               printk("%s: %sing: LBAsect=%ld, sectors=%ld, buffer=0x%08lx\n",
+                       drive->name, (rq->cmd==READ)?"read":"writ",
+                       block, rq->nr_sectors, (unsigned long) rq->buffer);
+#endif
+               OUT_BYTE(block,IDE_SECTOR_REG);
+               OUT_BYTE(block>>=8,IDE_LCYL_REG);
+               OUT_BYTE(block>>=8,IDE_HCYL_REG);
+               OUT_BYTE(((block>>8)&0x0f)|drive->select.all,IDE_SELECT_REG);
+       } else {
+               unsigned int sect,head,cyl,track;
+               track = block / drive->sect;
+               sect  = block % drive->sect + 1;
+               OUT_BYTE(sect,IDE_SECTOR_REG);
+               head  = track % drive->head;
+               cyl   = track / drive->head;
+               OUT_BYTE(cyl,IDE_LCYL_REG);
+               OUT_BYTE(cyl>>8,IDE_HCYL_REG);
+               OUT_BYTE(head|drive->select.all,IDE_SELECT_REG);
+#ifdef DEBUG
+               printk("%s: %sing: CHS=%d/%d/%d, sectors=%ld, buffer=0x%08lx\n",
+                       drive->name, (rq->cmd==READ)?"read":"writ", cyl,
+                       head, sect, rq->nr_sectors, (unsigned long) rq->buffer);
+#endif
+       }
+#ifdef CONFIG_BLK_DEV_PROMISE
+       if (use_promise_io) {
+               do_promise_io (drive, rq);
+               return;
+       }
+#endif /* CONFIG_BLK_DEV_PROMISE */
+       if (rq->cmd == READ) {
+#ifdef CONFIG_BLK_DEV_TRITON
+               if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_read, drive)))
+                       return;
+#endif /* CONFIG_BLK_DEV_TRITON */
+               ide_set_handler(drive, &read_intr, WAIT_CMD);
+               OUT_BYTE(drive->mult_count ? WIN_MULTREAD : WIN_READ, IDE_COMMAND_REG);
+               return;
+       }
+       if (rq->cmd == WRITE) {
+#ifdef CONFIG_BLK_DEV_TRITON
+               if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_write, drive)))
+                       return;
+#endif /* CONFIG_BLK_DEV_TRITON */
+               OUT_BYTE(drive->mult_count ? WIN_MULTWRITE : WIN_WRITE, IDE_COMMAND_REG);
+               if (ide_wait_stat(drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
+                       printk(KERN_ERR "%s: no DRQ after issuing %s\n", drive->name,
+                               drive->mult_count ? "MULTWRITE" : "WRITE");
+                       return;
+               }
+               if (!drive->unmask)
+                       cli();
+               if (drive->mult_count) {
+                       HWGROUP(drive)->wrq = *rq; /* scratchpad */
+                       ide_set_handler (drive, &multwrite_intr, WAIT_CMD);
+                       ide_multwrite(drive, drive->mult_count);
+               } else {
+                       ide_set_handler (drive, &write_intr, WAIT_CMD);
+                       idedisk_output_data(drive, rq->buffer, SECTOR_WORDS);
+               }
+               return;
+       }
+       printk(KERN_ERR "%s: bad command: %d\n", drive->name, rq->cmd);
+       ide_end_request(0, HWGROUP(drive));
+}
+
+static int idedisk_open (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+       MOD_INC_USE_COUNT;
+       if (drive->removable && drive->usage == 1) {
+               byte door_lock[] = {WIN_DOORLOCK,0,0,0};
+               struct request rq;
+               check_disk_change(inode->i_rdev);
+               ide_init_drive_cmd (&rq);
+               rq.buffer = door_lock;
+               /*
+                * Ignore the return code from door_lock,
+                * since the open() has already succeeded,
+                * and the door_lock is irrelevant at this point.
+                */
+               (void) ide_do_drive_cmd(drive, &rq, ide_wait);
+       }
+       return 0;
+}
+
+static void idedisk_release (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+       if (drive->removable && !drive->usage) {
+               byte door_unlock[] = {WIN_DOORUNLOCK,0,0,0};
+               struct request rq;
+               invalidate_buffers(inode->i_rdev);
+               ide_init_drive_cmd (&rq);
+               rq.buffer = door_unlock;
+               (void) ide_do_drive_cmd(drive, &rq, ide_wait);
+       }
+       MOD_DEC_USE_COUNT;
+}
+
+static int idedisk_media_change (ide_drive_t *drive)
+{
+       return drive->removable;        /* if removable, always assume it was changed */
+}
+
+/*
+ * current_capacity() returns the capacity (in sectors) of a drive
+ * according to its current geometry/LBA settings.
+ */
+static unsigned long idedisk_capacity (ide_drive_t  *drive)
+{
+       struct hd_driveid *id = drive->id;
+       unsigned long capacity = drive->cyl * drive->head * drive->sect;
+
+       drive->select.b.lba = 0;
+       /* Determine capacity, and use LBA if the drive properly supports it */
+       if (id != NULL && (id->capability & 2) && lba_capacity_is_ok(id)) {
+               if (id->lba_capacity >= capacity) {
+                       capacity = id->lba_capacity;
+                       drive->select.b.lba = 1;
+               }
+       }
+       return (capacity - drive->sect0);
+}
+
+static void idedisk_special (ide_drive_t *drive)
+{
+       special_t *s = &drive->special;
+
+       if (s->b.set_geometry) {
+               s->b.set_geometry = 0;
+               OUT_BYTE(drive->sect,IDE_SECTOR_REG);
+               OUT_BYTE(drive->cyl,IDE_LCYL_REG);
+               OUT_BYTE(drive->cyl>>8,IDE_HCYL_REG);
+               OUT_BYTE(((drive->head-1)|drive->select.all)&0xBF,IDE_SELECT_REG);
+               if (!IS_PROMISE_DRIVE)
+                       ide_cmd(drive, WIN_SPECIFY, drive->sect, &set_geometry_intr);
+       } else if (s->b.recalibrate) {
+               s->b.recalibrate = 0;
+               if (!IS_PROMISE_DRIVE)
+                       ide_cmd(drive, WIN_RESTORE, drive->sect, &recal_intr);
+       } else if (s->b.set_multmode) {
+               s->b.set_multmode = 0;
+               if (drive->id && drive->mult_req > drive->id->max_multsect)
+                       drive->mult_req = drive->id->max_multsect;
+               if (!IS_PROMISE_DRIVE)
+                       ide_cmd(drive, WIN_SETMULT, drive->mult_req, &set_multmode_intr);
+       } else if (s->all) {
+               int special = s->all;
+               s->all = 0;
+               printk(KERN_ERR "%s: bad special flag: 0x%02x\n", drive->name, special);
+       }
+}
+
+static void idedisk_pre_reset (ide_drive_t *drive)
+{
+       drive->special.all = 0;
+       drive->special.b.set_geometry = 1;
+       drive->special.b.recalibrate  = 1;
+       if (OK_TO_RESET_CONTROLLER)
+               drive->mult_count = 0;
+       if (!drive->keep_settings)
+               drive->mult_req = 0;
+       if (drive->mult_req != drive->mult_count)
+               drive->special.b.set_multmode = 1;
+}
+
+int idedisk_init (void);
+static ide_module_t idedisk_module = {
+       IDE_DRIVER_MODULE,
+       idedisk_init,
+       NULL
+};
+
+/*
+ *     IDE subdriver functions, registered with ide.c
+ */
+static ide_driver_t idedisk_driver = {
+       ide_disk,               /* media */
+       0,                      /* busy */
+       1,                      /* supports_dma */
+       NULL,                   /* cleanup */
+       do_rw_disk,             /* do_request */
+       NULL,                   /* end_request */
+       NULL,                   /* ioctl */
+       idedisk_open,           /* open */
+       idedisk_release,        /* release */
+       idedisk_media_change,   /* media_change */
+       idedisk_pre_reset,      /* pre_reset */
+       idedisk_capacity,       /* capacity */
+       idedisk_special         /* special */
+};
+
+static int idedisk_cleanup (ide_drive_t *drive)
+{
+       return ide_unregister_subdriver(drive);
+}
+
+static int idedisk_identify_device (ide_drive_t *drive)
+{
+       struct hd_driveid *id = drive->id;
+       
+       if (id == NULL)
+               return 0;
+
+       /* SunDisk drives: force one unit */
+       if (id->model[0] == 'S' && id->model[1] == 'u' && (drive->select.all & (1<<4)))
+               return 1;
+
+       return 0;
+}
+
+static void idedisk_setup (ide_drive_t *drive)
+{
+       struct hd_driveid *id = drive->id;
+       unsigned long capacity, check;
+       
+       if (id == NULL)
+               return;
+
+       /* check for removable disks (eg. SYQUEST), ignore 'WD' drives */
+       if (id->config & (1<<7)) {      /* removable disk ? */
+               if (id->model[0] != 'W' || id->model[1] != 'D')
+                       drive->removable = 1;
+       }
+
+       /* SunDisk drives: treat as non-removable */
+       if (id->model[0] == 'S' && id->model[1] == 'u')
+               drive->removable = 0;
+
+       /* Extract geometry if we did not already have one for the drive */
+       if (!drive->cyl || !drive->head || !drive->sect) {
+               drive->cyl     = drive->bios_cyl  = id->cyls;
+               drive->head    = drive->bios_head = id->heads;
+               drive->sect    = drive->bios_sect = id->sectors;
+       }
+       /* Handle logical geometry translation by the drive */
+       if ((id->field_valid & 1) && id->cur_cyls && id->cur_heads
+        && (id->cur_heads <= 16) && id->cur_sectors)
+       {
+               /*
+                * Extract the physical drive geometry for our use.
+                * Note that we purposely do *not* update the bios info.
+                * This way, programs that use it (like fdisk) will
+                * still have the same logical view as the BIOS does,
+                * which keeps the partition table from being screwed.
+                *
+                * An exception to this is the cylinder count,
+                * which we reexamine later on to correct for 1024 limitations.
+                */
+               drive->cyl  = id->cur_cyls;
+               drive->head = id->cur_heads;
+               drive->sect = id->cur_sectors;
+
+               /* check for word-swapped "capacity" field in id information */
+               capacity = drive->cyl * drive->head * drive->sect;
+               check = (id->cur_capacity0 << 16) | id->cur_capacity1;
+               if (check == capacity) {        /* was it swapped? */
+                       /* yes, bring it into little-endian order: */
+                       id->cur_capacity0 = (capacity >>  0) & 0xffff;
+                       id->cur_capacity1 = (capacity >> 16) & 0xffff;
+               }
+       }
+       /* Use physical geometry if what we have still makes no sense */
+       if ((!drive->head || drive->head > 16) && id->heads && id->heads <= 16) {
+               drive->cyl  = id->cyls;
+               drive->head = id->heads;
+               drive->sect = id->sectors;
+       }
+       /* Correct the number of cyls if the bios value is too small */
+       if (drive->sect == drive->bios_sect && drive->head == drive->bios_head) {
+               if (drive->cyl > drive->bios_cyl)
+                       drive->bios_cyl = drive->cyl;
+       }
+
+       (void) idedisk_capacity (drive); /* initialize LBA selection */
+
+       printk (KERN_INFO "%s: %.40s, %ldMB w/%dkB Cache, %sCHS=%d/%d/%d%s\n",
+        drive->name, id->model, idedisk_capacity(drive)/2048L, id->buf_size/2,
+        drive->select.b.lba ? "LBA, " : "",
+        drive->bios_cyl, drive->bios_head, drive->bios_sect,
+        drive->using_dma ? ", DMA" : "");
+
+       drive->mult_count = 0;
+       if (id->max_multsect) {
+               drive->mult_req = INITIAL_MULT_COUNT;
+               if (drive->mult_req > id->max_multsect)
+                       drive->mult_req = id->max_multsect;
+               if (drive->mult_req || ((id->multsect_valid & 1) && id->multsect))
+                       drive->special.b.set_multmode = 1;
+       }
+}
+
+int idedisk_init (void)
+{
+       ide_drive_t *drive;
+       int failed = 0;
+       
+       MOD_INC_USE_COUNT;
+       while ((drive = ide_scan_devices (ide_disk, NULL, failed++)) != NULL) {
+               if (idedisk_identify_device (drive))
+                       continue;
+               if (ide_register_subdriver (drive, &idedisk_driver, IDE_SUBDRIVER_VERSION)) {
+                       printk (KERN_ERR "ide-disk: %s: Failed to register the driver with ide.c\n", drive->name);
+                       continue;
+               }
+               idedisk_setup(drive);
+               if ((!drive->head || drive->head > 16) && !drive->select.b.lba) {
+                       printk(KERN_ERR "%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n", drive->name, drive->head);
+                       (void) idedisk_cleanup(drive);
+                       continue;
+               }
+               failed--;
+       }
+       ide_register_module(&idedisk_module);
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+#ifdef MODULE
+int init_module (void)
+{
+       return idedisk_init();
+}
+
+void cleanup_module (void)
+{
+       ide_drive_t *drive;
+       int failed = 0;
+
+       while ((drive = ide_scan_devices (ide_disk, &idedisk_driver, failed)) != NULL)
+               if (idedisk_cleanup (drive)) {
+                       printk (KERN_ERR "%s: cleanup_module() called while still busy\n", drive->name);
+                       failed++;
+               }
+       ide_unregister_module(&idedisk_module);
+}
+#endif /* MODULE */
diff --git a/drivers/block/ide-floppy.c b/drivers/block/ide-floppy.c
new file mode 100644 (file)
index 0000000..8de2b1a
--- /dev/null
@@ -0,0 +1,1432 @@
+/*
+ * linux/drivers/block/ide-floppy.c    Version 0.2 - ALPHA     Oct  31, 1996
+ *
+ * Copyright (C) 1996 Gadi Oxman <gadio@netvision.net.il>
+ */
+
+/*
+ * IDE ATAPI floppy driver.
+ *
+ * The driver currently doesn't have any fancy features, just the bare
+ * minimum read/write support.
+ *
+ * Many thanks to Lode Leroy <Lode.Leroy@www.ibase.be>, who tested so many
+ * ALPHA patches to this driver on an EASYSTOR LS-120 ATAPI floppy drive.
+ *
+ * Ver 0.1   Oct 17 96   Initial test version, mostly based on ide-tape.c.
+ * Ver 0.2   Oct 31 96   Minor changes.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/major.h>
+#include <linux/blkdev.h>
+#include <linux/errno.h>
+#include <linux/hdreg.h>
+#include <linux/genhd.h>
+#include <linux/malloc.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/unaligned.h>
+#include <asm/bitops.h>
+
+/*
+ *     Main Linux ide driver include file
+ */
+#include "ide.h"
+
+/*
+ *     The following are used to debug the driver.
+ */
+#define IDEFLOPPY_DEBUG_LOG            0
+#define IDEFLOPPY_DEBUG_INFO           0
+#define IDEFLOPPY_DEBUG_BUGS           1
+
+/*
+ *     After each failed packet command we issue a request sense command
+ *     and retry the packet command IDEFLOPPY_MAX_PC_RETRIES times.
+ */
+#define IDEFLOPPY_MAX_PC_RETRIES       3
+
+/*
+ *     With each packet command, we allocate a buffer of
+ *     IDEFLOPPY_PC_BUFFER_SIZE bytes.
+ */
+#define IDEFLOPPY_PC_BUFFER_SIZE       256
+
+/*
+ *     In various places in the driver, we need to allocate storage
+ *     for packet commands and requests, which will remain valid while
+ *     we leave the driver to wait for an interrupt or a timeout event.
+ */
+#define IDEFLOPPY_PC_STACK             (10 + IDEFLOPPY_MAX_PC_RETRIES)
+
+/*
+ *     Our view of a packet command.
+ */
+typedef struct idefloppy_packet_command_s {
+       u8 c[12];                               /* Actual packet bytes */
+       int retries;                            /* On each retry, we increment retries */
+       int error;                              /* Error code */
+       int request_transfer;                   /* Bytes to transfer */
+       int actually_transferred;               /* Bytes actually transferred */
+       int buffer_size;                        /* Size of our data buffer */
+       char *b_data;                           /* Pointer which runs on the buffers */
+       int b_count;                            /* Missing/Available data on the current buffer */
+       struct request *rq;                     /* The corresponding request */
+       byte *buffer;                           /* Data buffer */
+       byte *current_position;                 /* Pointer into the above buffer */
+       void (*callback) (ide_drive_t *);       /* Called when this packet command is completed */
+       byte pc_buffer[IDEFLOPPY_PC_BUFFER_SIZE];       /* Temporary buffer */
+       unsigned int flags;                     /* Status/Action bit flags */
+} idefloppy_pc_t;
+
+/*
+ *     Packet command flag bits.
+ */
+#define        PC_ABORT                        0       /* Set when an error is considered normal - We won't retry */
+#define PC_DMA_RECOMMENDED             2       /* 1 when we prefer to use DMA if possible */
+#define        PC_DMA_IN_PROGRESS              3       /* 1 while DMA in progress */
+#define        PC_DMA_ERROR                    4       /* 1 when encountered problem during DMA */
+#define        PC_WRITING                      5       /* Data direction */
+
+/*
+ *     Removable Block Access Capabilities Page
+ */
+typedef struct {
+       unsigned        page_code       :6;     /* Page code - Should be 0x1b */
+       unsigned        reserved1_6     :1;     /* Reserved */
+       unsigned        ps              :1;     /* Should be 0 */
+       u8              page_length;            /* Page Length - Should be 0xa */
+       unsigned        reserved2       :6;
+       unsigned        srfp            :1;     /* Supports reporting progress of format */
+       unsigned        sflp            :1;     /* System floppy type device */
+       unsigned        tlun            :3;     /* Total logical units supported by the device */
+       unsigned        reserved3       :3;
+       unsigned        sml             :1;     /* Single / Multiple lun supported */
+       unsigned        ncd             :1;     /* Non cd optical device */
+       u8              reserved[8];
+} idefloppy_capabilities_page_t;
+
+/*
+ *     Flexible disk page.
+ */
+typedef struct {
+       unsigned        page_code       :6;     /* Page code - Should be 0x5 */
+       unsigned        reserved1_6     :1;     /* Reserved */
+       unsigned        ps              :1;     /* The device is capable of saving the page */
+       u8              page_length;            /* Page Length - Should be 0x1e */
+       u16             transfer_rate;          /* In kilobits per second */
+       u8              heads, sectors;         /* Number of heads, Number of sectors per track */
+       u16             sector_size;            /* Byes per sector */
+       u16             cyls;                   /* Number of cylinders */
+       u8              reserved10[10];
+       u8              motor_delay;            /* Motor off delay */
+       u8              reserved21[7];
+       u16             rpm;                    /* Rotations per minute */
+       u8              reserved30[2];
+} idefloppy_flexible_disk_page_t;
+/*
+ *     Format capacity
+ */
+typedef struct {
+       u8              reserved[3];
+       u8              length;                 /* Length of the following descriptors in bytes */
+} idefloppy_capacity_header_t;
+
+typedef struct {
+       u32             blocks;                 /* Number of blocks */
+       unsigned        dc              :2;     /* Descriptor Code */
+       unsigned        reserved        :6;
+       u8              length_msb;             /* Block Length (MSB)*/
+       u16             length;                 /* Block Length */
+} idefloppy_capacity_descriptor_t;
+
+#define CAPACITY_INVALID       0x00
+#define CAPACITY_UNFORMATTED   0x01
+#define CAPACITY_CURRENT       0x02
+#define CAPACITY_NO_CARTRIDGE  0x03
+
+/*
+ *     Most of our global data which we need to save even as we leave the
+ *     driver due to an interrupt or a timer event is stored in a variable
+ *     of type idefloppy_floppy_t, defined below.
+ */
+typedef struct {
+       ide_drive_t *drive;
+
+       idefloppy_pc_t *pc;                     /* Current packet command */
+       idefloppy_pc_t *failed_pc;              /* Last failed packet command */
+       idefloppy_pc_t pc_stack[IDEFLOPPY_PC_STACK];/* Packet command stack */
+       int pc_stack_index;                     /* Next free packet command storage space */
+       struct request rq_stack[IDEFLOPPY_PC_STACK];
+       int rq_stack_index;                     /* We implement a circular array */
+
+       /*
+        *      Last error information
+        */
+       byte sense_key, asc, ascq;
+
+       /*
+        *      Device information
+        */
+       int blocks, block_size, bs_factor;                      /* Current format */
+       idefloppy_capacity_descriptor_t capacity;               /* Last format capacity */
+       idefloppy_flexible_disk_page_t flexible_disk_page;      /* Copy of the flexible disk page */
+
+       unsigned int flags;                     /* Status/Action flags */
+} idefloppy_floppy_t;
+
+/*
+ *     Floppy flag bits values.
+ */
+#define IDEFLOPPY_DRQ_INTERRUPT                0       /* DRQ interrupt device */
+#define IDEFLOPPY_MEDIA_CHANGED                1       /* Media may have changed */
+#define IDEFLOPPY_USE_READ12           2       /* Use READ12/WRITE12 or READ10/WRITE10 */
+
+/*
+ *     ATAPI floppy drive packet commands
+ */
+#define IDEFLOPPY_FORMAT_UNIT_CMD      0x04
+#define IDEFLOPPY_INQUIRY_CMD          0x12
+#define IDEFLOPPY_MODE_SELECT_CMD      0x55
+#define IDEFLOPPY_MODE_SENSE_CMD       0x5a
+#define IDEFLOPPY_READ10_CMD           0x28
+#define IDEFLOPPY_READ12_CMD           0xa8
+#define IDEFLOPPY_READ_CAPACITY_CMD    0x23
+#define IDEFLOPPY_REQUEST_SENSE_CMD    0x03
+#define IDEFLOPPY_PREVENT_REMOVAL_CMD  0x1e
+#define IDEFLOPPY_SEEK_CMD             0x2b
+#define IDEFLOPPY_START_STOP_CMD       0x1b
+#define IDEFLOPPY_TEST_UNIT_READY_CMD  0x00
+#define IDEFLOPPY_VERIFY_CMD           0x2f
+#define IDEFLOPPY_WRITE10_CMD          0x2a
+#define IDEFLOPPY_WRITE12_CMD          0xaa
+#define IDEFLOPPY_WRITE_VERIFY_CMD     0x2e
+
+/*
+ *     Defines for the mode sense command
+ */
+#define MODE_SENSE_CURRENT             0x00
+#define MODE_SENSE_CHANGEABLE          0x01
+#define MODE_SENSE_DEFAULT             0x02 
+#define MODE_SENSE_SAVED               0x03
+
+/*
+ *     Special requests for our block device strategy routine.
+ */
+#define        IDEFLOPPY_FIRST_RQ              90
+
+/*
+ *     IDEFLOPPY_PC_RQ is used to queue a packet command in the request queue.
+ */
+#define        IDEFLOPPY_PC_RQ                 90
+
+#define IDEFLOPPY_LAST_RQ              90
+
+/*
+ *     A macro which can be used to check if a given request command
+ *     originated in the driver or in the buffer cache layer.
+ */
+#define IDEFLOPPY_RQ_CMD(cmd)          ((cmd >= IDEFLOPPY_FIRST_RQ) && (cmd <= IDEFLOPPY_LAST_RQ))
+
+/*
+ *     Error codes which are returned in rq->errors to the higher part
+ *     of the driver.
+ */
+#define        IDEFLOPPY_ERROR_GENERAL         101
+
+/*
+ *     The ATAPI Status Register.
+ */
+typedef union {
+       unsigned all                    :8;
+       struct {
+               unsigned check          :1;     /* Error occurred */
+               unsigned idx            :1;     /* Reserved */
+               unsigned corr           :1;     /* Correctable error occurred */
+               unsigned drq            :1;     /* Data is request by the device */
+               unsigned dsc            :1;     /* Media access command finished */
+               unsigned reserved5      :1;     /* Reserved */
+               unsigned drdy           :1;     /* Ignored for ATAPI commands (ready to accept ATA command) */
+               unsigned bsy            :1;     /* The device has access to the command block */
+       } b;
+} idefloppy_status_reg_t;
+
+/*
+ *     The ATAPI error register.
+ */
+typedef union {
+       unsigned all                    :8;
+       struct {
+               unsigned ili            :1;     /* Illegal Length Indication */
+               unsigned eom            :1;     /* End Of Media Detected */
+               unsigned abrt           :1;     /* Aborted command - As defined by ATA */
+               unsigned mcr            :1;     /* Media Change Requested - As defined by ATA */
+               unsigned sense_key      :4;     /* Sense key of the last failed packet command */
+       } b;
+} idefloppy_error_reg_t;
+
+/*
+ *     ATAPI Feature Register
+ */
+typedef union {
+       unsigned all                    :8;
+       struct {
+               unsigned dma            :1;     /* Using DMA or PIO */
+               unsigned reserved321    :3;     /* Reserved */
+               unsigned reserved654    :3;     /* Reserved (Tag Type) */
+               unsigned reserved7      :1;     /* Reserved */
+       } b;
+} idefloppy_feature_reg_t;
+
+/*
+ *     ATAPI Byte Count Register.
+ */
+typedef union {
+       unsigned all                    :16;
+       struct {
+               unsigned low            :8;     /* LSB */
+               unsigned high           :8;     /* MSB */
+       } b;
+} idefloppy_bcount_reg_t;
+
+/*
+ *     ATAPI Interrupt Reason Register.
+ */
+typedef union {
+       unsigned all                    :8;
+       struct {
+               unsigned cod            :1;     /* Information transferred is command (1) or data (0) */
+               unsigned io             :1;     /* The device requests us to read (1) or write (0) */
+               unsigned reserved       :6;     /* Reserved */
+       } b;
+} idefloppy_ireason_reg_t;
+
+/*
+ *     ATAPI floppy Drive Select Register
+ */
+typedef union {        
+       unsigned all                    :8;
+       struct {
+               unsigned sam_lun        :3;     /* Logical unit number */
+               unsigned reserved3      :1;     /* Reserved */
+               unsigned drv            :1;     /* The responding drive will be drive 0 (0) or drive 1 (1) */
+               unsigned one5           :1;     /* Should be set to 1 */
+               unsigned reserved6      :1;     /* Reserved */
+               unsigned one7           :1;     /* Should be set to 1 */
+       } b;
+} idefloppy_drivesel_reg_t;
+
+/*
+ *     ATAPI Device Control Register
+ */
+typedef union {                        
+       unsigned all                    :8;
+       struct {
+               unsigned zero0          :1;     /* Should be set to zero */
+               unsigned nien           :1;     /* Device interrupt is disabled (1) or enabled (0) */
+               unsigned srst           :1;     /* ATA software reset. ATAPI devices should use the new ATAPI srst. */
+               unsigned one3           :1;     /* Should be set to 1 */
+               unsigned reserved4567   :4;     /* Reserved */
+       } b;
+} idefloppy_control_reg_t;
+
+/*
+ *     The following is used to format the general configuration word of
+ *     the ATAPI IDENTIFY DEVICE command.
+ */
+struct idefloppy_id_gcw {      
+       unsigned packet_size            :2;     /* Packet Size */
+       unsigned reserved234            :3;     /* Reserved */
+       unsigned drq_type               :2;     /* Command packet DRQ type */
+       unsigned removable              :1;     /* Removable media */
+       unsigned device_type            :5;     /* Device type */
+       unsigned reserved13             :1;     /* Reserved */
+       unsigned protocol               :2;     /* Protocol type */
+};
+
+/*
+ *     INQUIRY packet command - Data Format
+ */
+typedef struct {
+       unsigned        device_type     :5;     /* Peripheral Device Type */
+       unsigned        reserved0_765   :3;     /* Peripheral Qualifier - Reserved */
+       unsigned        reserved1_6t0   :7;     /* Reserved */
+       unsigned        rmb             :1;     /* Removable Medium Bit */
+       unsigned        ansi_version    :3;     /* ANSI Version */
+       unsigned        ecma_version    :3;     /* ECMA Version */
+       unsigned        iso_version     :2;     /* ISO Version */
+       unsigned        response_format :4;     /* Response Data Format */
+       unsigned        reserved3_45    :2;     /* Reserved */
+       unsigned        reserved3_6     :1;     /* TrmIOP - Reserved */
+       unsigned        reserved3_7     :1;     /* AENC - Reserved */
+       u8              additional_length;      /* Additional Length (total_length-4) */
+       u8              rsv5, rsv6, rsv7;       /* Reserved */
+       u8              vendor_id[8];           /* Vendor Identification */
+       u8              product_id[16];         /* Product Identification */
+       u8              revision_level[4];      /* Revision Level */
+       u8              vendor_specific[20];    /* Vendor Specific - Optional */
+       u8              reserved56t95[40];      /* Reserved - Optional */
+                                               /* Additional information may be returned */
+} idefloppy_inquiry_result_t;
+
+/*
+ *     REQUEST SENSE packet command result - Data Format.
+ */
+typedef struct {
+       unsigned        error_code      :7;     /* Current error (0x70) */
+       unsigned        valid           :1;     /* The information field conforms to SFF-8070i */
+       u8              reserved1       :8;     /* Reserved */
+       unsigned        sense_key       :4;     /* Sense Key */
+       unsigned        reserved2_4     :1;     /* Reserved */
+       unsigned        ili             :1;     /* Incorrect Length Indicator */
+       unsigned        reserved2_67    :2;
+       u32             information __attribute__ ((packed));
+       u8              asl;                    /* Additional sense length (n-7) */
+       u32             command_specific;       /* Additional command specific information */
+       u8              asc;                    /* Additional Sense Code */
+       u8              ascq;                   /* Additional Sense Code Qualifier */
+       u8              replaceable_unit_code;  /* Field Replaceable Unit Code */
+       u8              reserved[3];
+       u8              pad[2];                 /* Padding to 20 bytes */
+} idefloppy_request_sense_result_t;
+
+/*
+ *     Pages of the SELECT SENSE / MODE SENSE packet commands.
+ */
+#define        IDEFLOPPY_CAPABILITIES_PAGE     0x1b
+#define IDEFLOPPY_FLEXIBLE_DISK_PAGE   0x05
+
+/*
+ *     Mode Parameter Header for the MODE SENSE packet command
+ */
+typedef struct {
+       u16             mode_data_length;       /* Length of the following data transfer */
+       u8              medium_type;            /* Medium Type */
+       unsigned        reserved3       :7;
+       unsigned        wp              :1;     /* Write protect */
+       u8              reserved[4];
+} idefloppy_mode_parameter_header_t;
+
+#define IDEFLOPPY_MIN(a,b)     ((a)<(b) ? (a):(b))
+#define        IDEFLOPPY_MAX(a,b)      ((a)>(b) ? (a):(b))
+
+/*
+ *     Too bad. The drive wants to send us data which we are not ready to accept.
+ *     Just throw it away.
+ */
+static void idefloppy_discard_data (ide_drive_t *drive, unsigned int bcount)
+{
+       while (bcount--)
+               IN_BYTE (IDE_DATA_REG);
+}
+
+#if IDEFLOPPY_DEBUG_BUGS
+static void idefloppy_write_zeros (ide_drive_t *drive, unsigned int bcount)
+{
+       while (bcount--)
+               OUT_BYTE (0, IDE_DATA_REG);
+}
+#endif /* IDEFLOPPY_DEBUG_BUGS */
+
+/*
+ *     idefloppy_end_request is used to finish servicing a request.
+ *
+ *     For read/write requests, we will call ide_end_request to pass to the
+ *     next buffer.
+ */
+static void idefloppy_end_request (byte uptodate, ide_hwgroup_t *hwgroup)
+{
+       ide_drive_t *drive = hwgroup->drive;
+       struct request *rq = hwgroup->rq;
+
+#if IDEFLOPPY_DEBUG_LOG
+       printk (KERN_INFO "Reached idefloppy_end_request\n");
+#endif /* IDEFLOPPY_DEBUG_LOG */
+
+       if (!IDEFLOPPY_RQ_CMD (rq->cmd)) {
+               ide_end_request (uptodate, hwgroup);
+               return;
+       }
+       switch (uptodate) {
+               case 0: rq->errors = IDEFLOPPY_ERROR_GENERAL; break;
+               case 1: rq->errors = 0; break;
+               default: rq->errors = uptodate;
+       }
+       ide_end_drive_cmd (drive, 0, 0);
+}
+
+static void idefloppy_input_buffers (ide_drive_t *drive, idefloppy_pc_t *pc, unsigned int bcount)
+{
+       struct request *rq = pc->rq;
+       struct buffer_head *bh = rq->bh;
+       int count;
+       
+       while (bcount) {
+#if IDEFLOPPY_DEBUG_BUGS
+               if (bh == NULL) {
+                       printk (KERN_ERR "%s: bh == NULL in idefloppy_input_buffers, bcount == %d\n", drive->name, bcount);
+                       idefloppy_discard_data (drive, bcount);
+                       return;
+               }
+#endif /* IDEFLOPPY_DEBUG_BUGS */
+               count = IDEFLOPPY_MIN (bh->b_size - pc->b_count, bcount);
+               atapi_input_bytes (drive, bh->b_data + pc->b_count, count);
+               bcount -= count; pc->b_count += count;
+               if (pc->b_count == bh->b_size) {
+                       rq->sector += rq->current_nr_sectors;
+                       rq->nr_sectors -= rq->current_nr_sectors;
+                       idefloppy_end_request (1, HWGROUP(drive));
+                       if ((bh = rq->bh) != NULL)
+                               pc->b_count = 0;
+               }
+       }
+}
+
+static void idefloppy_output_buffers (ide_drive_t *drive, idefloppy_pc_t *pc, unsigned int bcount)
+{
+       struct request *rq = pc->rq;
+       struct buffer_head *bh = rq->bh;
+       int count;
+       
+       while (bcount) {
+#if IDEFLOPPY_DEBUG_BUGS
+               if (bh == NULL) {
+                       printk (KERN_ERR "%s: bh == NULL in idefloppy_output_buffers, bcount == %d\n", drive->name, bcount);
+                       idefloppy_write_zeros (drive, bcount);
+                       return;
+               }
+#endif /* IDEFLOPPY_DEBUG_BUGS */
+               count = IDEFLOPPY_MIN (pc->b_count, bcount);
+               atapi_output_bytes (drive, pc->b_data, count);
+               bcount -= count; pc->b_data += count; pc->b_count -= count;
+               if (!pc->b_count) {
+                       rq->sector += rq->current_nr_sectors;
+                       rq->nr_sectors -= rq->current_nr_sectors;
+                       idefloppy_end_request (1, HWGROUP(drive));
+                       if ((bh = rq->bh) != NULL) {
+                               pc->b_data = bh->b_data;
+                               pc->b_count = bh->b_size;
+                       }
+               }
+       }
+}
+
+#ifdef CONFIG_BLK_DEV_TRITON
+static void idefloppy_update_buffers (ide_drive_t *drive, idefloppy_pc_t *pc)
+{
+       struct request *rq = pc->rq;
+       struct buffer_head *bh = rq->bh;
+
+       while ((bh = rq->bh) != NULL)
+               idefloppy_end_request (1, HWGROUP(drive));
+}
+#endif /* CONFIG_BLK_DEV_TRITON */
+
+/*
+ *     idefloppy_queue_pc_head generates a new packet command request in front
+ *     of the request queue, before the current request, so that it will be
+ *     processed immediately, on the next pass through the driver.
+ */
+static void idefloppy_queue_pc_head (ide_drive_t *drive,idefloppy_pc_t *pc,struct request *rq)
+{
+       ide_init_drive_cmd (rq);
+       rq->buffer = (char *) pc;
+       rq->cmd = IDEFLOPPY_PC_RQ;
+       (void) ide_do_drive_cmd (drive, rq, ide_preempt);
+}
+
+static idefloppy_pc_t *idefloppy_next_pc_storage (ide_drive_t *drive)
+{
+       idefloppy_floppy_t *floppy = drive->driver_data;
+
+       if (floppy->pc_stack_index==IDEFLOPPY_PC_STACK)
+               floppy->pc_stack_index=0;
+       return (&floppy->pc_stack[floppy->pc_stack_index++]);
+}
+
+static struct request *idefloppy_next_rq_storage (ide_drive_t *drive)
+{
+       idefloppy_floppy_t *floppy = drive->driver_data;
+
+       if (floppy->rq_stack_index==IDEFLOPPY_PC_STACK)
+               floppy->rq_stack_index=0;
+       return (&floppy->rq_stack[floppy->rq_stack_index++]);
+}
+
+/*
+ *     idefloppy_analyze_error is called on each failed packet command retry
+ *     to analyze the request sense.
+ */
+static void idefloppy_analyze_error (ide_drive_t *drive,idefloppy_request_sense_result_t *result)
+{
+       idefloppy_floppy_t *floppy = drive->driver_data;
+
+       floppy->sense_key = result->sense_key; floppy->asc = result->asc; floppy->ascq = result->ascq;
+#if IDEFLOPPY_DEBUG_LOG
+       printk (KERN_INFO "ide-floppy: pc = %x, sense key = %x, asc = %x, ascq = %x\n",floppy->failed_pc->c[0],result->sense_key,result->asc,result->ascq);
+#endif /* IDEFLOPPY_DEBUG_LOG */
+}
+
+static void idefloppy_request_sense_callback (ide_drive_t *drive)
+{
+       idefloppy_floppy_t *floppy = drive->driver_data;
+
+#if IDEFLOPPY_DEBUG_LOG
+       printk (KERN_INFO "ide-floppy: Reached idefloppy_request_sense_callback\n");
+#endif /* IDEFLOPPY_DEBUG_LOG */
+       if (!floppy->pc->error) {
+               idefloppy_analyze_error (drive,(idefloppy_request_sense_result_t *) floppy->pc->buffer);
+               idefloppy_end_request (1,HWGROUP (drive));
+       } else {
+               printk (KERN_ERR "Error in REQUEST SENSE itself - Aborting request!\n");
+               idefloppy_end_request (0,HWGROUP (drive));
+       }
+}
+
+/*
+ *     General packet command callback function.
+ */
+static void idefloppy_pc_callback (ide_drive_t *drive)
+{
+       idefloppy_floppy_t *floppy = drive->driver_data;
+       
+#if IDEFLOPPY_DEBUG_LOG
+       printk (KERN_INFO "ide-floppy: Reached idefloppy_pc_callback\n");
+#endif /* IDEFLOPPY_DEBUG_LOG */
+
+       idefloppy_end_request (floppy->pc->error ? 0:1, HWGROUP(drive));
+}
+
+/*
+ *     idefloppy_init_pc initializes a packet command.
+ */
+static void idefloppy_init_pc (idefloppy_pc_t *pc)
+{
+       memset (pc->c, 0, 12);
+       pc->retries = 0;
+       pc->flags = 0;
+       pc->request_transfer = 0;
+       pc->buffer = pc->pc_buffer;
+       pc->buffer_size = IDEFLOPPY_PC_BUFFER_SIZE;
+       pc->b_data = NULL;
+       pc->callback = &idefloppy_pc_callback;
+}
+
+static void idefloppy_create_request_sense_cmd (idefloppy_pc_t *pc)
+{
+       idefloppy_init_pc (pc); 
+       pc->c[0] = IDEFLOPPY_REQUEST_SENSE_CMD;
+       pc->c[4] = 255;
+       pc->request_transfer = 18;
+       pc->callback = &idefloppy_request_sense_callback;
+}
+
+/*
+ *     idefloppy_retry_pc is called when an error was detected during the
+ *     last packet command. We queue a request sense packet command in
+ *     the head of the request list.
+ */
+static void idefloppy_retry_pc (ide_drive_t *drive)
+{
+       idefloppy_pc_t *pc;
+       struct request *rq;
+       idefloppy_error_reg_t error;
+
+       error.all = IN_BYTE (IDE_ERROR_REG);
+       pc = idefloppy_next_pc_storage (drive);
+       rq = idefloppy_next_rq_storage (drive);
+       idefloppy_create_request_sense_cmd (pc);
+       idefloppy_queue_pc_head (drive, pc, rq);
+}
+
+/*
+ *     idefloppy_pc_intr is the usual interrupt handler which will be called
+ *     during a packet command.
+ */
+static void idefloppy_pc_intr (ide_drive_t *drive)
+{
+       idefloppy_floppy_t *floppy = drive->driver_data;
+       idefloppy_status_reg_t status;
+       idefloppy_bcount_reg_t bcount;
+       idefloppy_ireason_reg_t ireason;
+       idefloppy_pc_t *pc=floppy->pc;
+       struct request *rq = pc->rq;
+       unsigned int temp;
+
+#if IDEFLOPPY_DEBUG_LOG
+       printk (KERN_INFO "ide-floppy: Reached idefloppy_pc_intr interrupt handler\n");
+#endif /* IDEFLOPPY_DEBUG_LOG */       
+
+#ifdef CONFIG_BLK_DEV_TRITON
+       if (test_bit (PC_DMA_IN_PROGRESS, &pc->flags)) {
+               if (HWIF(drive)->dmaproc(ide_dma_status_bad, drive)) {
+                       set_bit (PC_DMA_ERROR, &pc->flags);
+               } else {
+                       pc->actually_transferred=pc->request_transfer;
+                       idefloppy_update_buffers (drive, pc);
+               }
+               (void) (HWIF(drive)->dmaproc(ide_dma_abort, drive));    /* End DMA */
+#if IDEFLOPPY_DEBUG_LOG
+               printk (KERN_INFO "ide-floppy: DMA finished\n");
+#endif /* IDEFLOPPY_DEBUG_LOG */
+       }
+#endif /* CONFIG_BLK_DEV_TRITON */
+
+       status.all = GET_STAT();                                        /* Clear the interrupt */
+
+       if (!status.b.drq) {                                            /* No more interrupts */
+#if IDEFLOPPY_DEBUG_LOG
+               printk (KERN_INFO "Packet command completed, %d bytes transferred\n", pc->actually_transferred);
+#endif /* IDEFLOPPY_DEBUG_LOG */
+               clear_bit (PC_DMA_IN_PROGRESS, &pc->flags);
+
+               ide_sti();
+
+               if (status.b.check || test_bit (PC_DMA_ERROR, &pc->flags)) {    /* Error detected */
+#if IDEFLOPPY_DEBUG_LOG
+                       printk (KERN_INFO "ide-floppy: %s: I/O error, ",drive->name);
+#endif /* IDEFLOPPY_DEBUG_LOG */
+                       rq->errors++;
+                       if (pc->c[0] == IDEFLOPPY_REQUEST_SENSE_CMD) {
+                               printk (KERN_ERR "ide-floppy: I/O error in request sense command\n");
+                               ide_do_reset (drive);
+                               return;
+                       }
+                       idefloppy_retry_pc (drive);                             /* Retry operation */
+                       return;
+               }
+               pc->error = 0;
+               if (floppy->failed_pc == pc)
+                       floppy->failed_pc=NULL;
+               pc->callback(drive);                    /* Command finished - Call the callback function */
+               return;
+       }
+#ifdef CONFIG_BLK_DEV_TRITON
+       if (clear_bit (PC_DMA_IN_PROGRESS, &pc->flags)) {
+               printk (KERN_ERR "ide-floppy: The floppy wants to issue more interrupts in DMA mode\n");
+               printk (KERN_ERR "ide-floppy: DMA disabled, reverting to PIO\n");
+               drive->using_dma=0;
+               ide_do_reset (drive);
+               return;
+       }
+#endif /* CONFIG_BLK_DEV_TRITON */
+       bcount.b.high=IN_BYTE (IDE_BCOUNTH_REG);                        /* Get the number of bytes to transfer */
+       bcount.b.low=IN_BYTE (IDE_BCOUNTL_REG);                 /* on this interrupt */
+       ireason.all=IN_BYTE (IDE_IREASON_REG);
+
+       if (ireason.b.cod) {
+               printk (KERN_ERR "ide-floppy: CoD != 0 in idefloppy_pc_intr\n");
+               ide_do_reset (drive);
+               return;
+       }
+       if (ireason.b.io == test_bit (PC_WRITING, &pc->flags)) {        /* Hopefully, we will never get here */
+               printk (KERN_ERR "ide-floppy: We wanted to %s, ", ireason.b.io ? "Write":"Read");
+               printk (KERN_ERR "but the floppy wants us to %s !\n",ireason.b.io ? "Read":"Write");
+               ide_do_reset (drive);
+               return;
+       }
+       if (!test_bit (PC_WRITING, &pc->flags)) {                       /* Reading - Check that we have enough space */
+               temp = pc->actually_transferred + bcount.all;
+               if ( temp > pc->request_transfer) {
+                       if (temp > pc->buffer_size) {
+                               printk (KERN_ERR "ide-floppy: The floppy wants to send us more data than expected - discarding data\n");
+                               idefloppy_discard_data (drive,bcount.all);
+                               ide_set_handler (drive,&idefloppy_pc_intr,WAIT_CMD);
+                               return;
+                       }
+#if IDEFLOPPY_DEBUG_LOG
+                       printk (KERN_NOTICE "ide-floppy: The floppy wants to send us more data than expected - allowing transfer\n");
+#endif /* IDEFLOPPY_DEBUG_LOG */
+               }
+       }
+       if (test_bit (PC_WRITING, &pc->flags)) {
+               if (pc->buffer != NULL)
+                       atapi_output_bytes (drive,pc->current_position,bcount.all);     /* Write the current buffer */
+               else
+                       idefloppy_output_buffers (drive, pc, bcount.all);
+       } else {
+               if (pc->buffer != NULL)
+                       atapi_input_bytes (drive,pc->current_position,bcount.all);      /* Read the current buffer */
+               else
+                       idefloppy_input_buffers (drive, pc, bcount.all);
+       }
+       pc->actually_transferred+=bcount.all;                           /* Update the current position */
+       pc->current_position+=bcount.all;
+
+       ide_set_handler (drive,&idefloppy_pc_intr,WAIT_CMD);            /* And set the interrupt handler again */
+}
+
+static void idefloppy_transfer_pc (ide_drive_t *drive)
+{
+       idefloppy_floppy_t *floppy = drive->driver_data;
+       idefloppy_ireason_reg_t ireason;
+
+       if (ide_wait_stat (drive,DRQ_STAT,BUSY_STAT,WAIT_READY)) {
+               printk (KERN_ERR "ide-floppy: Strange, packet command initiated yet DRQ isn't asserted\n");
+               return;
+       }
+       ireason.all=IN_BYTE (IDE_IREASON_REG);
+       if (!ireason.b.cod || ireason.b.io) {
+               printk (KERN_ERR "ide-floppy: (IO,CoD) != (0,1) while issuing a packet command\n");
+               ide_do_reset (drive);
+               return;
+       }
+       ide_set_handler (drive, &idefloppy_pc_intr, WAIT_CMD);  /* Set the interrupt routine */
+       atapi_output_bytes (drive, floppy->pc->c, 12);          /* Send the actual packet */
+}
+
+/*
+ *     Issue a packet command
+ */
+static void idefloppy_issue_pc (ide_drive_t *drive, idefloppy_pc_t *pc)
+{
+       idefloppy_floppy_t *floppy = drive->driver_data;
+       idefloppy_bcount_reg_t bcount;
+       int dma_ok = 0;
+
+#if IDEFLOPPY_DEBUG_BUGS
+       if (floppy->pc->c[0] == IDEFLOPPY_REQUEST_SENSE_CMD && pc->c[0] == IDEFLOPPY_REQUEST_SENSE_CMD) {
+               printk (KERN_ERR "ide-floppy: possible ide-floppy.c bug - Two request sense in serial were issued\n");
+       }
+#endif /* IDEFLOPPY_DEBUG_BUGS */
+
+       if (floppy->failed_pc == NULL && pc->c[0] != IDEFLOPPY_REQUEST_SENSE_CMD)
+               floppy->failed_pc=pc;
+       floppy->pc=pc;                                                  /* Set the current packet command */
+
+       if (pc->retries > IDEFLOPPY_MAX_PC_RETRIES || test_bit (PC_ABORT, &pc->flags)) {
+               /*
+                *      We will "abort" retrying a packet command in case
+                *      a legitimate error code was received.
+                */
+               if (!test_bit (PC_ABORT, &pc->flags)) {
+                       printk (KERN_ERR "ide-floppy: %s: I/O error, pc = %2x, key = %2x, asc = %2x, ascq = %2x\n",
+                               drive->name, pc->c[0], floppy->sense_key, floppy->asc, floppy->ascq);
+                       pc->error = IDEFLOPPY_ERROR_GENERAL;            /* Giving up */
+               }
+               floppy->failed_pc=NULL;
+               pc->callback(drive);
+               return;
+       }
+#if IDEFLOPPY_DEBUG_LOG
+       printk (KERN_INFO "Retry number - %d\n",pc->retries);
+#endif /* IDEFLOPPY_DEBUG_LOG */
+
+       pc->retries++;
+       pc->actually_transferred=0;                                     /* We haven't transferred any data yet */
+       pc->current_position=pc->buffer;
+       bcount.all=pc->request_transfer;                                /* Request to transfer the entire buffer at once */
+
+#ifdef CONFIG_BLK_DEV_TRITON
+       if (clear_bit (PC_DMA_ERROR, &pc->flags)) {
+               printk (KERN_WARNING "ide-floppy: DMA disabled, reverting to PIO\n");
+               drive->using_dma=0;
+       }
+       if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma)
+               dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive);
+#endif /* CONFIG_BLK_DEV_TRITON */
+
+       OUT_BYTE (drive->ctl,IDE_CONTROL_REG);
+       OUT_BYTE (dma_ok ? 1:0,IDE_FEATURE_REG);                        /* Use PIO/DMA */
+       OUT_BYTE (bcount.b.high,IDE_BCOUNTH_REG);
+       OUT_BYTE (bcount.b.low,IDE_BCOUNTL_REG);
+       OUT_BYTE (drive->select.all,IDE_SELECT_REG);
+
+#ifdef CONFIG_BLK_DEV_TRITON
+       if (dma_ok) {                                                   /* Begin DMA, if necessary */
+               set_bit (PC_DMA_IN_PROGRESS, &pc->flags);
+               (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive));
+       }
+#endif /* CONFIG_BLK_DEV_TRITON */
+
+       if (test_bit (IDEFLOPPY_DRQ_INTERRUPT, &floppy->flags)) {
+               ide_set_handler (drive, &idefloppy_transfer_pc, WAIT_CMD);
+               OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG);              /* Issue the packet command */
+       } else {
+               OUT_BYTE (WIN_PACKETCMD, IDE_COMMAND_REG);
+               idefloppy_transfer_pc (drive);
+       }
+}
+
+static void idefloppy_rw_callback (ide_drive_t *drive)
+{
+#if IDEFLOPPY_DEBUG_LOG        
+       printk (KERN_INFO "ide-floppy: Reached idefloppy_rw_callback\n");
+#endif /* IDEFLOPPY_DEBUG_LOG */
+
+       return;
+}
+
+static void idefloppy_create_prevent_cmd (idefloppy_pc_t *pc, int prevent)
+{
+#if IDEFLOPPY_DEBUG_LOG
+       printk (KERN_INFO "ide-floppy: creating prevent removal command, prevent = %d\n", prevent);
+#endif /* IDEFLOPPY_DEBUG_LOG */
+
+       idefloppy_init_pc (pc);
+       pc->c[0] = IDEFLOPPY_PREVENT_REMOVAL_CMD;
+       pc->c[4] = prevent;
+}
+
+static void idefloppy_create_read_capacity_cmd (idefloppy_pc_t *pc)
+{
+       idefloppy_init_pc (pc);
+       pc->c[0] = IDEFLOPPY_READ_CAPACITY_CMD;
+       pc->c[7] = 255;
+       pc->c[8] = 255;
+}
+
+/*
+ *     A mode sense command is used to "sense" floppy parameters.
+ */
+static void idefloppy_create_mode_sense_cmd (idefloppy_pc_t *pc, byte page_code, byte type)
+{
+       unsigned short length = sizeof (idefloppy_mode_parameter_header_t);
+       
+       idefloppy_init_pc (pc);
+       pc->c[0] = IDEFLOPPY_MODE_SENSE_CMD;
+       pc->c[1] = 0;
+       pc->c[2] = page_code + (type << 6);
+
+       switch (page_code) {
+               case IDEFLOPPY_CAPABILITIES_PAGE:
+                       length += 12;
+                       break;
+               case IDEFLOPPY_FLEXIBLE_DISK_PAGE:
+                       length += 32;
+                       break;
+               default:
+                       printk (KERN_ERR "ide-floppy: unsupported page code in create_mode_sense_cmd\n");
+       }
+       put_unaligned (htons (length), (unsigned short *) &pc->c[7]);
+       pc->request_transfer = length;
+}
+
+static void idefloppy_create_start_stop_cmd (idefloppy_pc_t *pc, int start)
+{
+       idefloppy_init_pc (pc);
+       pc->c[0] = IDEFLOPPY_START_STOP_CMD;
+       pc->c[4] = start;
+}
+
+static void idefloppy_create_rw_cmd (idefloppy_floppy_t *floppy, idefloppy_pc_t *pc, struct request *rq)
+{
+       int block = rq->sector / floppy->bs_factor;
+       int blocks = rq->nr_sectors / floppy->bs_factor;
+       
+#if IDEFLOPPY_DEBUG_LOG
+       printk ("create_rw1%d_cmd: block == %d, blocks == %d\n",
+               2 * test_bit (IDEFLOPPY_USE_READ12, &floppy->flags), block, blocks);
+#endif /* IDEFLOPPY_DEBUG_LOG */
+
+       idefloppy_init_pc (pc);
+       if (test_bit (IDEFLOPPY_USE_READ12, &floppy->flags)) {
+               pc->c[0] = rq->cmd == READ ? IDEFLOPPY_READ12_CMD : IDEFLOPPY_WRITE12_CMD;
+               put_unaligned (htonl (blocks), (unsigned int *) &pc->c[6]);
+       } else {
+               pc->c[0] = rq->cmd == READ ? IDEFLOPPY_READ10_CMD : IDEFLOPPY_WRITE10_CMD;
+               put_unaligned (htons (blocks), (unsigned short *) &pc->c[7]);
+       }
+       put_unaligned (htonl (block), (unsigned int *) &pc->c[2]);
+       pc->callback = &idefloppy_rw_callback;
+       pc->rq = rq;
+       pc->b_data = rq->buffer;
+       pc->b_count = rq->cmd == READ ? 0 : rq->bh->b_size;
+       if (rq->cmd == WRITE)
+               set_bit (PC_WRITING, &pc->flags);
+       pc->buffer = NULL;
+       pc->request_transfer = pc->buffer_size = blocks * floppy->block_size;
+       set_bit (PC_DMA_RECOMMENDED, &pc->flags);
+}
+
+/*
+ *     idefloppy_do_request is our request handling function.  
+ */
+static void idefloppy_do_request (ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+       idefloppy_floppy_t *floppy = drive->driver_data;
+       idefloppy_pc_t *pc;
+
+#if IDEFLOPPY_DEBUG_LOG
+       printk (KERN_INFO "rq_status: %d, rq_dev: %u, cmd: %d, errors: %d\n",rq->rq_status,(unsigned int) rq->rq_dev,rq->cmd,rq->errors);
+       printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %ld\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors);
+#endif /* IDEFLOPPY_DEBUG_LOG */
+
+       if (rq->errors >= ERROR_MAX) {
+               printk (KERN_ERR "ide-floppy: %s: I/O error, pc = %2x, key = %2x, asc = %2x, ascq = %2x\n",
+                       drive->name, floppy->failed_pc->c[0], floppy->sense_key, floppy->asc, floppy->ascq);
+               idefloppy_end_request (0, HWGROUP(drive));
+               return;
+       }
+       switch (rq->cmd) {
+               case READ:
+               case WRITE:
+                       if (rq->sector % floppy->bs_factor || rq->nr_sectors % floppy->bs_factor) {
+                               printk ("%s: unsupported r/w request size\n", drive->name);
+                               idefloppy_end_request (0, HWGROUP(drive));
+                               return;
+                       }
+                       pc = idefloppy_next_pc_storage (drive);
+                       idefloppy_create_rw_cmd (floppy, pc, rq);
+                       break;
+               case IDEFLOPPY_PC_RQ:
+                       pc = (idefloppy_pc_t *) rq->buffer;
+                       break;
+               default:
+                       printk (KERN_ERR "ide-floppy: unsupported command %x in request queue\n", rq->cmd);
+                       idefloppy_end_request (0,HWGROUP (drive));
+                       return;
+       }
+       pc->rq = rq;
+       idefloppy_issue_pc (drive, pc);
+}
+
+/*
+ *     idefloppy_queue_pc_tail adds a special packet command request to the
+ *     tail of the request queue, and waits for it to be serviced.
+ */
+static int idefloppy_queue_pc_tail (ide_drive_t *drive,idefloppy_pc_t *pc)
+{
+       struct request rq;
+
+       ide_init_drive_cmd (&rq);
+       rq.buffer = (char *) pc;
+       rq.cmd = IDEFLOPPY_PC_RQ;
+       return ide_do_drive_cmd (drive, &rq, ide_wait);
+}
+
+/*
+ *     Look at the flexible disk page parameters. We will ignore the CHS
+ *     capacity parameters and use the LBA parameters instead.
+ */
+static int idefloppy_get_flexible_disk_page (ide_drive_t *drive)
+{
+       idefloppy_floppy_t *floppy = drive->driver_data;
+       idefloppy_pc_t pc;
+       idefloppy_mode_parameter_header_t *header;
+       idefloppy_flexible_disk_page_t *page;
+       int capacity;
+
+       idefloppy_create_mode_sense_cmd (&pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE, MODE_SENSE_CURRENT);
+       if (idefloppy_queue_pc_tail (drive,&pc)) {
+               printk (KERN_ERR "ide-floppy: Can't get flexible disk page parameters\n");
+               return 1;
+       }
+       header = (idefloppy_mode_parameter_header_t *) pc.buffer;
+       page = (idefloppy_flexible_disk_page_t *) (header + 1);
+
+       page->transfer_rate = ntohs (page->transfer_rate);
+       page->sector_size = ntohs (page->sector_size);
+       page->cyls = ntohs (page->cyls);
+       page->rpm = ntohs (page->rpm);
+       capacity = page->cyls * page->heads * page->sectors * page->sector_size;
+       if (memcmp (page, &floppy->flexible_disk_page, sizeof (idefloppy_flexible_disk_page_t))) {
+               printk (KERN_INFO "%s: %dkB, %d/%d/%d CHS, %d kBps, %d sector size, %d rpm\n",
+                       drive->name, capacity / 1024, page->cyls, page->heads, page->sectors,
+                       page->transfer_rate / 8, page->sector_size, page->rpm);
+               floppy->flexible_disk_page = *page;
+               if (capacity != floppy->blocks * floppy->block_size)
+                       printk (KERN_NOTICE "%s: The drive reports both %d and %d bytes as its capacity\n",
+                               drive->name, capacity, floppy->blocks * floppy->block_size);
+       }
+       return 0;
+}
+
+/*
+ *     Determine if a media is present in the floppy drive, and if so,
+ *     its LBA capacity.
+ */
+static int idefloppy_get_capacity (ide_drive_t *drive)
+{
+       idefloppy_floppy_t *floppy = drive->driver_data;
+       idefloppy_pc_t pc;
+       idefloppy_capacity_header_t *header;
+       idefloppy_capacity_descriptor_t *descriptor;
+       int i, descriptors, rc = 1, blocks, length;
+       
+       idefloppy_create_read_capacity_cmd (&pc);
+       if (idefloppy_queue_pc_tail (drive, &pc)) {
+               printk (KERN_ERR "ide-floppy: Can't get floppy parameters\n");
+               return 1;
+       }
+       header = (idefloppy_capacity_header_t *) pc.buffer;
+       descriptors = header->length / sizeof (idefloppy_capacity_descriptor_t);
+       descriptor = (idefloppy_capacity_descriptor_t *) (header + 1);
+       for (i = 0; i < descriptors; i++, descriptor++) {
+               blocks = descriptor->blocks = ntohl (descriptor->blocks);
+               length = descriptor->length = ntohs (descriptor->length);
+               if (!i && descriptor->dc == CAPACITY_CURRENT) {
+                       if (memcmp (descriptor, &floppy->capacity, sizeof (idefloppy_capacity_descriptor_t))) {
+                               printk (KERN_INFO "%s: %dkB, %d blocks, %d sector size\n", drive->name, blocks * length / 1024, blocks, length);
+                               floppy->capacity = *descriptor;
+                       }
+                       if (!length || length % 512)
+                               printk (KERN_ERR "%s: %d bytes block size not supported\n", drive->name, length);
+                       else {
+                               floppy->blocks = blocks;
+                               floppy->block_size = length;
+                               if ((floppy->bs_factor = length / 512) != 1)
+                                       printk (KERN_NOTICE "%s: warning: non 512 bytes block size not fully supported\n", drive->name);
+                               drive->part[0].nr_sects = blocks * floppy->bs_factor;
+                               if (length > BLOCK_SIZE)
+                                       blksize_size[HWIF(drive)->major][drive->select.b.unit << PARTN_BITS] = length;
+                               rc = 0;
+                       }
+               }
+#if IDEFLOPPY_DEBUG_INFO
+               if (!i) printk (KERN_INFO "Descriptor 0 Code: %d\n", descriptor->dc);
+               printk (KERN_INFO "Descriptor %d: %dkB, %d blocks, %d sector size\n", i, blocks * length / 1024, blocks, length);
+#endif /* IDEFLOPPY_DEBUG_INFO */
+       }
+       (void) idefloppy_get_flexible_disk_page (drive);
+       return rc;
+}
+
+/*
+ *     Our special ide-floppy ioctl's.
+ *
+ *     Currently there aren't any ioctl's.
+ */
+static int idefloppy_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file,
+                                unsigned int cmd, unsigned long arg)
+{
+       return -EIO;
+}
+
+/*
+ *     Our open/release functions
+ */
+static int idefloppy_open (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+       idefloppy_floppy_t *floppy = drive->driver_data;
+       idefloppy_pc_t pc;
+       
+#if IDEFLOPPY_DEBUG_LOG
+       printk (KERN_INFO "Reached idefloppy_open\n");
+#endif /* IDEFLOPPY_DEBUG_LOG */
+
+       MOD_INC_USE_COUNT;
+       if (drive->usage == 1) {
+               idefloppy_create_start_stop_cmd (&pc, 1);
+               (void) idefloppy_queue_pc_tail (drive, &pc);
+               if (idefloppy_get_capacity (drive)) {
+                       drive->usage--;
+                       MOD_DEC_USE_COUNT;
+                       return -EIO;
+               }
+               set_bit (IDEFLOPPY_MEDIA_CHANGED, &floppy->flags);
+               idefloppy_create_prevent_cmd (&pc, 1);
+               (void) idefloppy_queue_pc_tail (drive, &pc);
+               check_disk_change(inode->i_rdev);
+       }
+       return 0;
+}
+
+static void idefloppy_release (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+       idefloppy_pc_t pc;
+       
+#if IDEFLOPPY_DEBUG_LOG
+       printk (KERN_INFO "Reached idefloppy_release\n");
+#endif /* IDEFLOPPY_DEBUG_LOG */
+
+       if (!drive->usage) {
+               invalidate_buffers (inode->i_rdev);
+               idefloppy_create_prevent_cmd (&pc, 0);
+               (void) idefloppy_queue_pc_tail (drive, &pc);
+       }
+       MOD_DEC_USE_COUNT;
+}
+
+/*
+ *     Check media change. Use a simple algorithm for now.
+ */
+static int idefloppy_media_change (ide_drive_t *drive)
+{
+       idefloppy_floppy_t *floppy = drive->driver_data;
+       
+       return clear_bit (IDEFLOPPY_MEDIA_CHANGED, &floppy->flags);
+}
+
+/*
+ *     Return the current floppy capacity to ide.c.
+ */
+static unsigned long idefloppy_capacity (ide_drive_t *drive)
+{
+       idefloppy_floppy_t *floppy = drive->driver_data;
+       unsigned long capacity = floppy->blocks * floppy->bs_factor;
+
+       return capacity ? capacity : 0x7fffffff;
+}
+
+/*
+ *     idefloppy_identify_device checks if we can support a drive,
+ *     based on the ATAPI IDENTIFY command results.
+ */
+static int idefloppy_identify_device (ide_drive_t *drive,struct hd_driveid *id)
+{
+       struct idefloppy_id_gcw gcw;
+#if IDEFLOPPY_DEBUG_INFO
+       unsigned short mask,i;
+       char buffer[80];
+#endif /* IDEFLOPPY_DEBUG_INFO */
+
+       *((unsigned short *) &gcw) = id->config;
+
+#if IDEFLOPPY_DEBUG_INFO
+       printk (KERN_INFO "Dumping ATAPI Identify Device floppy parameters\n");
+       switch (gcw.protocol) {
+               case 0: case 1: sprintf (buffer, "ATA");break;
+               case 2: sprintf (buffer, "ATAPI");break;
+               case 3: sprintf (buffer, "Reserved (Unknown to ide-floppy)");break;
+       }
+       printk (KERN_INFO "Protocol Type: %s\n", buffer);
+       switch (gcw.device_type) {
+               case 0: sprintf (buffer, "Direct-access Device");break;
+               case 1: sprintf (buffer, "Streaming Tape Device");break;
+               case 2: case 3: case 4: sprintf (buffer, "Reserved");break;
+               case 5: sprintf (buffer, "CD-ROM Device");break;
+               case 6: sprintf (buffer, "Reserved");
+               case 7: sprintf (buffer, "Optical memory Device");break;
+               case 0x1f: sprintf (buffer, "Unknown or no Device type");break;
+               default: sprintf (buffer, "Reserved");
+       }
+       printk (KERN_INFO "Device Type: %x - %s\n", gcw.device_type, buffer);
+       printk (KERN_INFO "Removable: %s\n",gcw.removable ? "Yes":"No");        
+       switch (gcw.drq_type) {
+               case 0: sprintf (buffer, "Microprocessor DRQ");break;
+               case 1: sprintf (buffer, "Interrupt DRQ");break;
+               case 2: sprintf (buffer, "Accelerated DRQ");break;
+               case 3: sprintf (buffer, "Reserved");break;
+       }
+       printk (KERN_INFO "Command Packet DRQ Type: %s\n", buffer);
+       switch (gcw.packet_size) {
+               case 0: sprintf (buffer, "12 bytes");break;
+               case 1: sprintf (buffer, "16 bytes");break;
+               default: sprintf (buffer, "Reserved");break;
+       }
+       printk (KERN_INFO "Command Packet Size: %s\n", buffer);
+       printk (KERN_INFO "Model: %s\n",id->model);
+       printk (KERN_INFO "Firmware Revision: %s\n",id->fw_rev);
+       printk (KERN_INFO "Serial Number: %s\n",id->serial_no);
+       printk (KERN_INFO "Write buffer size(?): %d bytes\n",id->buf_size*512);
+       printk (KERN_INFO "DMA: %s",id->capability & 0x01 ? "Yes\n":"No\n");
+       printk (KERN_INFO "LBA: %s",id->capability & 0x02 ? "Yes\n":"No\n");
+       printk (KERN_INFO "IORDY can be disabled: %s",id->capability & 0x04 ? "Yes\n":"No\n");
+       printk (KERN_INFO "IORDY supported: %s",id->capability & 0x08 ? "Yes\n":"Unknown\n");
+       printk (KERN_INFO "ATAPI overlap supported: %s",id->capability & 0x20 ? "Yes\n":"No\n");
+       printk (KERN_INFO "PIO Cycle Timing Category: %d\n",id->tPIO);
+       printk (KERN_INFO "DMA Cycle Timing Category: %d\n",id->tDMA);
+       printk (KERN_INFO "Single Word DMA supported modes:\n");
+       for (i=0,mask=1;i<8;i++,mask=mask << 1) {
+               if (id->dma_1word & mask)
+                       printk (KERN_INFO "   Mode %d%s\n", i, (id->dma_1word & (mask << 8)) ? " (active)" : "");
+       }
+       printk (KERN_INFO "Multi Word DMA supported modes:\n");
+       for (i=0,mask=1;i<8;i++,mask=mask << 1) {
+               if (id->dma_mword & mask)
+                       printk (KERN_INFO "   Mode %d%s\n", i, (id->dma_mword & (mask << 8)) ? " (active)" : "");
+       }
+       if (id->field_valid & 0x0002) {
+               printk (KERN_INFO "Enhanced PIO Modes: %s\n",id->eide_pio_modes & 1 ? "Mode 3":"None");
+               if (id->eide_dma_min == 0)
+                       sprintf (buffer, "Not supported");
+               else
+                       sprintf (buffer, "%d ns",id->eide_dma_min);
+               printk (KERN_INFO "Minimum Multi-word DMA cycle per word: %s\n", buffer);
+               if (id->eide_dma_time == 0)
+                       sprintf (buffer, "Not supported");
+               else
+                       sprintf (buffer, "%d ns",id->eide_dma_time);
+               printk (KERN_INFO "Manufacturer\'s Recommended Multi-word cycle: %s\n", buffer);
+               if (id->eide_pio == 0)
+                       sprintf (buffer, "Not supported");
+               else
+                       sprintf (buffer, "%d ns",id->eide_pio);
+               printk (KERN_INFO "Minimum PIO cycle without IORDY: %s\n", buffer);
+               if (id->eide_pio_iordy == 0)
+                       sprintf (buffer, "Not supported");
+               else
+                       sprintf (buffer, "%d ns",id->eide_pio_iordy);
+               printk (KERN_INFO "Minimum PIO cycle with IORDY: %s\n", buffer);
+       } else
+               printk (KERN_INFO "According to the device, fields 64-70 are not valid.\n");
+#endif /* IDEFLOPPY_DEBUG_INFO */
+
+       if (gcw.protocol != 2)
+               printk (KERN_ERR "ide-floppy: Protocol is not ATAPI\n");
+       else if (gcw.device_type != 0)
+               printk (KERN_ERR "ide-floppy: Device type is not set to floppy\n");
+       else if (!gcw.removable)
+               printk (KERN_ERR "ide-floppy: The removable flag is not set\n");
+       else if (gcw.drq_type == 3) {
+               printk (KERN_ERR "ide-floppy: Sorry, DRQ type %d not supported\n", gcw.drq_type);
+       } else if (gcw.packet_size != 0) {
+               printk (KERN_ERR "ide-floppy: Packet size is not 12 bytes long\n");
+       } else
+               return 1;
+       return 0;
+}
+
+/*
+ *     idefloppy_get_capabilities asks the floppy about its various
+ *     parameters.
+ */
+static void idefloppy_get_capabilities (ide_drive_t *drive)
+{
+       idefloppy_pc_t pc;
+       idefloppy_mode_parameter_header_t *header;
+       idefloppy_capabilities_page_t *capabilities;
+       
+       idefloppy_create_mode_sense_cmd (&pc, IDEFLOPPY_CAPABILITIES_PAGE, MODE_SENSE_CURRENT);
+       if (idefloppy_queue_pc_tail (drive,&pc)) {
+               printk (KERN_ERR "ide-floppy: Can't get drive capabilities\n");
+               return;
+       }
+       header = (idefloppy_mode_parameter_header_t *) pc.buffer;
+       capabilities = (idefloppy_capabilities_page_t *) (header + 1);
+
+       if (!capabilities->sflp)
+               printk (KERN_INFO "%s: Warning - system floppy device bit is not set\n", drive->name);
+
+#if IDEFLOPPY_DEBUG_INFO
+       printk (KERN_INFO "Dumping the results of the MODE SENSE packet command\n");
+       printk (KERN_INFO "Mode Parameter Header:\n");
+       printk (KERN_INFO "Mode Data Length - %d\n",header->mode_data_length);
+       printk (KERN_INFO "Medium Type - %d\n",header->medium_type);
+       printk (KERN_INFO "WP - %d\n",header->wp);
+
+       printk (KERN_INFO "Capabilities Page:\n");
+       printk (KERN_INFO "Page code - %d\n",capabilities->page_code);
+       printk (KERN_INFO "Page length - %d\n",capabilities->page_length);
+       printk (KERN_INFO "PS - %d\n",capabilities->ps);
+       printk (KERN_INFO "System Floppy Type device - %s\n",capabilities->sflp ? "Yes":"No");
+       printk (KERN_INFO "Supports Reporting progress of Format - %s\n",capabilities->srfp ? "Yes":"No");
+       printk (KERN_INFO "Non CD Optical device - %s\n",capabilities->ncd ? "Yes":"No");
+       printk (KERN_INFO "Multiple LUN support - %s\n",capabilities->sml ? "Yes":"No");
+       printk (KERN_INFO "Total LUN supported - %s\n",capabilities->tlun ? "Yes":"No");
+#endif /* IDEFLOPPY_DEBUG_INFO */
+}
+
+/*
+ *     Driver initialization.
+ */
+static void idefloppy_setup (ide_drive_t *drive, idefloppy_floppy_t *floppy)
+{
+       struct idefloppy_id_gcw gcw;
+
+       *((unsigned short *) &gcw) = drive->id->config;
+       drive->driver_data = floppy;
+       drive->ready_stat = 0;
+       memset (floppy, 0, sizeof (idefloppy_floppy_t));
+       floppy->drive = drive;
+       floppy->pc = floppy->pc_stack;
+       if (gcw.drq_type == 1)
+               set_bit (IDEFLOPPY_DRQ_INTERRUPT, &floppy->flags);
+
+       idefloppy_get_capabilities (drive);
+       (void) idefloppy_get_capacity (drive);
+}
+
+static int idefloppy_cleanup (ide_drive_t *drive)
+{
+       idefloppy_floppy_t *floppy = drive->driver_data;
+
+       if (ide_unregister_subdriver (drive))
+               return 1;
+       drive->driver_data = NULL;
+       kfree (floppy);
+       return 0;
+}
+
+int idefloppy_init (void);
+static ide_module_t idefloppy_module = {
+       IDE_DRIVER_MODULE,
+       idefloppy_init,
+       NULL
+};
+
+/*
+ *     IDE subdriver functions, registered with ide.c
+ */
+static ide_driver_t idefloppy_driver = {
+       ide_floppy,             /* media */
+       0,                      /* busy */
+       1,                      /* supports_dma */
+       idefloppy_cleanup,      /* cleanup */
+       idefloppy_do_request,   /* do_request */
+       idefloppy_end_request,  /* end_request */
+       idefloppy_ioctl,        /* ioctl */
+       idefloppy_open,         /* open */
+       idefloppy_release,      /* release */
+       idefloppy_media_change, /* media_change */
+       NULL,                   /* pre_reset */
+       idefloppy_capacity,     /* capacity */
+       NULL                    /* special */
+};
+
+/*
+ *     idefloppy_init will register the driver for each floppy.
+ */
+int idefloppy_init (void)
+{
+       ide_drive_t *drive;
+       idefloppy_floppy_t *floppy;
+       int failed = 0;
+
+       MOD_INC_USE_COUNT;
+       while ((drive = ide_scan_devices (ide_floppy, NULL, failed++)) != NULL) {
+               if (!idefloppy_identify_device (drive, drive->id)) {
+                       printk (KERN_ERR "ide-floppy: %s: not supported by this version of ide-floppy\n", drive->name);
+                       continue;
+               }
+               if ((floppy = (idefloppy_floppy_t *) kmalloc (sizeof (idefloppy_floppy_t), GFP_KERNEL)) == NULL) {
+                       printk (KERN_ERR "ide-floppy: %s: Can't allocate a floppy structure\n", drive->name);
+                       continue;
+               }
+               if (ide_register_subdriver (drive, &idefloppy_driver, IDE_SUBDRIVER_VERSION)) {
+                       printk (KERN_ERR "ide-floppy: %s: Failed to register the driver with ide.c\n", drive->name);
+                       kfree (floppy);
+                       continue;
+               }
+               idefloppy_setup (drive, floppy);
+               failed--;
+       }
+       ide_register_module(&idefloppy_module);
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+#ifdef MODULE
+int init_module (void)
+{
+       return idefloppy_init ();
+}
+
+void cleanup_module (void)
+{
+       ide_drive_t *drive;
+       int failed = 0;
+
+       while ((drive = ide_scan_devices (ide_floppy, &idefloppy_driver, failed)) != NULL)
+               if (idefloppy_cleanup (drive)) {
+                       printk ("%s: cleanup_module() called while still busy\n", drive->name);
+                       failed++;
+               }
+       ide_unregister_module(&idefloppy_module);
+}
+#endif /* MODULE */
diff --git a/drivers/block/ide-probe.c b/drivers/block/ide-probe.c
new file mode 100644 (file)
index 0000000..bda7e8f
--- /dev/null
@@ -0,0 +1,724 @@
+/*
+ *  linux/drivers/block/ide-probe.c    Version 1.0  Oct  31, 1996
+ *
+ *  Copyright (C) 1994-1996  Linus Torvalds & authors (see below)
+ */
+
+/*
+ *  Maintained by Mark Lord  <mlord@pobox.com>
+ *            and Gadi Oxman <gadio@netvision.net.il>
+ *
+ * This is the IDE probe module, as evolved from hd.c and ide.c.
+ *
+ *  From hd.c:
+ *  |
+ *  | It traverses the request-list, using interrupts to jump between functions.
+ *  | As nearly all functions can be called within interrupts, we may not sleep.
+ *  | Special care is recommended.  Have Fun!
+ *  |
+ *  | modified by Drew Eckhardt to check nr of hd's from the CMOS.
+ *  |
+ *  | Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug
+ *  | in the early extended-partition checks and added DM partitions.
+ *  |
+ *  | Early work on error handling by Mika Liljeberg (liljeber@cs.Helsinki.FI).
+ *  |
+ *  | IRQ-unmask, drive-id, multiple-mode, support for ">16 heads",
+ *  | and general streamlining by Mark Lord (mlord@pobox.com).
+ *
+ *  October, 1994 -- Complete line-by-line overhaul for linux 1.1.x, by:
+ *
+ *     Mark Lord       (mlord@pobox.com)               (IDE Perf.Pkg)
+ *     Delman Lee      (delman@mipg.upenn.edu)         ("Mr. atdisk2")
+ *     Scott Snyder    (snyder@fnald0.fnal.gov)        (ATAPI IDE cd-rom)
+ *
+ *  This was a rewrite of just about everything from hd.c, though some original
+ *  code is still sprinkled about.  Think of it as a major evolution, with
+ *  inspiration from lots of linux users, esp.  hamish@zot.apana.org.au
+ *
+ * Version 1.0         move drive probing code from ide.c to ide-probe.c
+ */
+
+#undef REALLY_SLOW_IO          /* most systems can safely undef this */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/major.h>
+#include <linux/blkdev.h>
+#include <linux/errno.h>
+#include <linux/hdreg.h>
+#include <linux/genhd.h>
+#include <linux/malloc.h>
+
+#include <asm/byteorder.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#include "ide.h"
+
+static inline void do_identify (ide_drive_t *drive, byte cmd)
+{
+       int bswap = 1;
+       struct hd_driveid *id;
+
+       id = drive->id = kmalloc (SECTOR_WORDS*4, GFP_KERNEL);
+       ide_input_data(drive, id, SECTOR_WORDS);        /* read 512 bytes of id info */
+       sti();
+       ide_fix_driveid(id);
+
+#if defined (CONFIG_SCSI_EATA_DMA) || defined (CONFIG_SCSI_EATA_PIO)
+       /*
+        * EATA SCSI controllers do a hardware ATA emulation:
+        * Ignore them if there is a driver for them available.
+        */
+       if ((id->model[0] == 'P' && id->model[1] == 'M')
+        || (id->model[0] == 'S' && id->model[1] == 'K')) {
+               printk("%s: EATA SCSI HBA %.10s\n", drive->name, id->model);
+               drive->present = 0;
+               return;
+       }
+#endif /* CONFIG_SCSI_EATA_DMA || CONFIG_SCSI_EATA_PIO */
+
+       /*
+        *  WIN_IDENTIFY returns little-endian info,
+        *  WIN_PIDENTIFY *usually* returns little-endian info.
+        */
+       if (cmd == WIN_PIDENTIFY) {
+               if ((id->model[0] == 'N' && id->model[1] == 'E') /* NEC */
+                || (id->model[0] == 'F' && id->model[1] == 'X') /* Mitsumi */
+                || (id->model[0] == 'P' && id->model[1] == 'i'))/* Pioneer */
+                       bswap ^= 1;     /* Vertos drives may still be weird */
+       }
+       ide_fixstring (id->model,     sizeof(id->model),     bswap);
+       ide_fixstring (id->fw_rev,    sizeof(id->fw_rev),    bswap);
+       ide_fixstring (id->serial_no, sizeof(id->serial_no), bswap);
+
+       drive->present = 1;
+       printk("%s: %s, ", drive->name, id->model);
+
+       /*
+        * Check for an ATAPI device
+        */
+       if (cmd == WIN_PIDENTIFY) {
+               byte type = (id->config >> 8) & 0x1f;
+               printk("ATAPI ");
+#ifdef CONFIG_BLK_DEV_PROMISE
+               if (HWIF(drive)->is_promise2) {
+                       printk(" -- not supported on 2nd Promise port\n");
+                       drive->present = 0;
+                       return;
+               }
+#endif /* CONFIG_BLK_DEV_PROMISE */
+               switch (type) {
+                       case ide_floppy:
+                               if (strstr (id->model, "oppy") || strstr (id->model, "poyp")) {
+                                       printk ("FLOPPY");
+                                       break;
+                               }
+                               printk ("cdrom or floppy?, assuming ");
+                               type = ide_cdrom;       /* Early cdrom models used zero */
+                       case ide_cdrom:
+                               printk ("CDROM");
+                               drive->removable = 1;
+                               break;
+                       case ide_tape:
+                               printk ("TAPE");
+                               break;
+                       default:
+                               printk("UNKNOWN (type %d)", type);
+                               break;
+               }
+               printk (" drive\n");
+               drive->media = type;
+               return;
+       }
+
+       drive->media = ide_disk;
+       printk("ATA DISK drive\n");
+       return;
+}
+
+/*
+ * Delay for *at least* 50ms.  As we don't know how much time is left
+ * until the next tick occurs, we wait an extra tick to be safe.
+ * This is used only during the probing/polling for drives at boot time.
+ */
+static void delay_50ms (void)
+{
+       unsigned long timer = jiffies + ((HZ + 19)/20) + 1;
+       while (timer > jiffies);
+}
+
+/*
+ * try_to_identify() sends an ATA(PI) IDENTIFY request to a drive
+ * and waits for a response.  It also monitors irqs while this is
+ * happening, in hope of automatically determining which one is
+ * being used by the interface.
+ *
+ * Returns:    0  device was identified
+ *             1  device timed-out (no response to identify request)
+ *             2  device aborted the command (refused to identify itself)
+ */
+static int try_to_identify (ide_drive_t *drive, byte cmd)
+{
+       int rc;
+       ide_ioreg_t hd_status;
+       unsigned long timeout;
+       int irqs = 0;
+
+       if (!HWIF(drive)->irq) {                /* already got an IRQ? */
+               probe_irq_off(probe_irq_on());  /* clear dangling irqs */
+               irqs = probe_irq_on();          /* start monitoring irqs */
+               OUT_BYTE(drive->ctl,IDE_CONTROL_REG);   /* enable device irq */
+       }
+
+       delay_50ms();                           /* take a deep breath */
+       if ((IN_BYTE(IDE_ALTSTATUS_REG) ^ IN_BYTE(IDE_STATUS_REG)) & ~INDEX_STAT) {
+               printk("%s: probing with STATUS instead of ALTSTATUS\n", drive->name);
+               hd_status = IDE_STATUS_REG;     /* ancient Seagate drives */
+       } else
+               hd_status = IDE_ALTSTATUS_REG;  /* use non-intrusive polling */
+
+#if CONFIG_BLK_DEV_PROMISE
+       if (IS_PROMISE_DRIVE) {
+               if (promise_cmd(drive,PROMISE_IDENTIFY)) {
+                       if (irqs)
+                               (void) probe_irq_off(irqs);
+                       return 1;
+               }
+       } else
+#endif /* CONFIG_BLK_DEV_PROMISE */
+               OUT_BYTE(cmd,IDE_COMMAND_REG);          /* ask drive for ID */
+       timeout = ((cmd == WIN_IDENTIFY) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2;
+       timeout += jiffies;
+       do {
+               if (jiffies > timeout) {
+                       if (irqs)
+                               (void) probe_irq_off(irqs);
+                       return 1;       /* drive timed-out */
+               }
+               delay_50ms();           /* give drive a breather */
+       } while (IN_BYTE(hd_status) & BUSY_STAT);
+
+       delay_50ms();           /* wait for IRQ and DRQ_STAT */
+       if (OK_STAT(GET_STAT(),DRQ_STAT,BAD_R_STAT)) {
+               unsigned long flags;
+               save_flags(flags);
+               cli();                  /* some systems need this */
+               do_identify(drive, cmd); /* drive returned ID */
+               rc = 0;                 /* drive responded with ID */
+               (void) GET_STAT();      /* clear drive IRQ */
+               restore_flags(flags);
+       } else
+               rc = 2;                 /* drive refused ID */
+       if (!HWIF(drive)->irq) {
+               irqs = probe_irq_off(irqs);     /* get our irq number */
+               if (irqs > 0) {
+                       HWIF(drive)->irq = irqs; /* save it for later */
+                       irqs = probe_irq_on();
+                       OUT_BYTE(drive->ctl|2,IDE_CONTROL_REG); /* mask device irq */
+                       udelay(5);
+                       (void) probe_irq_off(irqs);
+                       (void) probe_irq_off(probe_irq_on()); /* clear self-inflicted irq */
+                       (void) GET_STAT();      /* clear drive IRQ */
+
+               } else {        /* Mmmm.. multiple IRQs.. don't know which was ours */
+                       printk("%s: IRQ probe failed (%d)\n", drive->name, irqs);
+#ifdef CONFIG_BLK_DEV_CMD640
+#ifdef CMD640_DUMP_REGS
+                       if (HWIF(drive)->chipset == ide_cmd640) {
+                               printk("%s: Hmmm.. probably a driver problem.\n", drive->name);
+                               CMD640_DUMP_REGS;
+                       }
+#endif /* CMD640_DUMP_REGS */
+#endif /* CONFIG_BLK_DEV_CMD640 */
+               }
+       }
+       return rc;
+}
+
+/*
+ * do_probe() has the difficult job of finding a drive if it exists,
+ * without getting hung up if it doesn't exist, without trampling on
+ * ethernet cards, and without leaving any IRQs dangling to haunt us later.
+ *
+ * If a drive is "known" to exist (from CMOS or kernel parameters),
+ * but does not respond right away, the probe will "hang in there"
+ * for the maximum wait time (about 30 seconds), otherwise it will
+ * exit much more quickly.
+ *
+ * Returns:    0  device was identified
+ *             1  device timed-out (no response to identify request)
+ *             2  device aborted the command (refused to identify itself)
+ *             3  bad status from device (possible for ATAPI drives)
+ *             4  probe was not attempted because failure was obvious
+ */
+static int do_probe (ide_drive_t *drive, byte cmd)
+{
+       int rc;
+       ide_hwif_t *hwif = HWIF(drive);
+       if (drive->present) {   /* avoid waiting for inappropriate probes */
+               if ((drive->media != ide_disk) && (cmd == WIN_IDENTIFY))
+                       return 4;
+       }
+#ifdef DEBUG
+       printk("probing for %s: present=%d, media=%d, probetype=%s\n",
+               drive->name, drive->present, drive->media,
+               (cmd == WIN_IDENTIFY) ? "ATA" : "ATAPI");
+#endif
+       SELECT_DRIVE(hwif,drive);
+       delay_50ms();
+       if (IN_BYTE(IDE_SELECT_REG) != drive->select.all && !drive->present) {
+               OUT_BYTE(0xa0,IDE_SELECT_REG);  /* exit with drive0 selected */
+               delay_50ms();           /* allow BUSY_STAT to assert & clear */
+               return 3;    /* no i/f present: avoid killing ethernet cards */
+       }
+
+       if (OK_STAT(GET_STAT(),READY_STAT,BUSY_STAT)
+        || drive->present || cmd == WIN_PIDENTIFY)
+       {
+               if ((rc = try_to_identify(drive,cmd)))   /* send cmd and wait */
+                       rc = try_to_identify(drive,cmd); /* failed: try again */
+               if (rc == 1)
+                       printk("%s: no response (status = 0x%02x)\n", drive->name, GET_STAT());
+               (void) GET_STAT();              /* ensure drive irq is clear */
+       } else {
+               rc = 3;                         /* not present or maybe ATAPI */
+       }
+       if (drive->select.b.unit != 0) {
+               OUT_BYTE(0xa0,IDE_SELECT_REG);  /* exit with drive0 selected */
+               delay_50ms();
+               (void) GET_STAT();              /* ensure drive irq is clear */
+       }
+       return rc;
+}
+
+/*
+ * probe_for_drive() tests for existence of a given drive using do_probe().
+ *
+ * Returns:    0  no device was found
+ *             1  device was found (note: drive->present might still be 0)
+ */
+static inline byte probe_for_drive (ide_drive_t *drive)
+{
+       if (drive->noprobe)                     /* skip probing? */
+               return drive->present;
+       if (do_probe(drive, WIN_IDENTIFY) >= 2) { /* if !(success||timed-out) */
+               (void) do_probe(drive, WIN_PIDENTIFY); /* look for ATAPI device */
+       }
+       if (!drive->present)
+               return 0;                       /* drive not found */
+       if (drive->id == NULL) {                /* identification failed? */
+               if (drive->media == ide_disk) {
+                       printk ("%s: non-IDE drive, CHS=%d/%d/%d\n",
+                        drive->name, drive->cyl, drive->head, drive->sect);
+               } else if (drive->media == ide_cdrom) {
+                       printk("%s: ATAPI cdrom (?)\n", drive->name);
+               } else {
+                       drive->present = 0;     /* nuke it */
+               }
+       }
+       return 1;       /* drive was found */
+}
+
+/*
+ * We query CMOS about hard disks : it could be that we have a SCSI/ESDI/etc
+ * controller that is BIOS compatible with ST-506, and thus showing up in our
+ * BIOS table, but not register compatible, and therefore not present in CMOS.
+ *
+ * Furthermore, we will assume that our ST-506 drives <if any> are the primary
+ * drives in the system -- the ones reflected as drive 1 or 2.  The first
+ * drive is stored in the high nibble of CMOS byte 0x12, the second in the low
+ * nibble.  This will be either a 4 bit drive type or 0xf indicating use byte
+ * 0x19 for an 8 bit type, drive 1, 0x1a for drive 2 in CMOS.  A non-zero value
+ * means we have an AT controller hard disk for that drive.
+ *
+ * Of course, there is no guarantee that either drive is actually on the
+ * "primary" IDE interface, but we don't bother trying to sort that out here.
+ * If a drive is not actually on the primary interface, then these parameters
+ * will be ignored.  This results in the user having to supply the logical
+ * drive geometry as a boot parameter for each drive not on the primary i/f.
+ *
+ * The only "perfect" way to handle this would be to modify the setup.[cS] code
+ * to do BIOS calls Int13h/Fn08h and Int13h/Fn48h to get all of the drive info
+ * for us during initialization.  I have the necessary docs -- any takers?  -ml
+ */
+static void probe_cmos_for_drives (ide_hwif_t *hwif)
+{
+#ifdef __i386__
+       extern struct drive_info_struct drive_info;
+       byte cmos_disks, *BIOS = (byte *) &drive_info;
+       int unit;
+
+#ifdef CONFIG_BLK_DEV_PROMISE
+       if (hwif->is_promise2)
+               return;
+#endif /* CONFIG_BLK_DEV_PROMISE */
+       outb_p(0x12,0x70);              /* specify CMOS address 0x12 */
+       cmos_disks = inb_p(0x71);       /* read the data from 0x12 */
+       /* Extract drive geometry from CMOS+BIOS if not already setup */
+       for (unit = 0; unit < MAX_DRIVES; ++unit) {
+               ide_drive_t *drive = &hwif->drives[unit];
+               if ((cmos_disks & (0xf0 >> (unit*4))) && !drive->present && !drive->nobios) {
+                       drive->cyl   = drive->bios_cyl  = *(unsigned short *)BIOS;
+                       drive->head  = drive->bios_head = *(BIOS+2);
+                       drive->sect  = drive->bios_sect = *(BIOS+14);
+                       drive->ctl   = *(BIOS+8);
+                       drive->present = 1;
+               }
+               BIOS += 16;
+       }
+#endif
+}
+
+/*
+ * This routine only knows how to look for drive units 0 and 1
+ * on an interface, so any setting of MAX_DRIVES > 2 won't work here.
+ */
+static void probe_hwif (ide_hwif_t *hwif)
+{
+       unsigned int unit;
+       unsigned long flags;
+
+       if (hwif->noprobe)
+               return;
+       if (hwif->io_ports[IDE_DATA_OFFSET] == HD_DATA)
+               probe_cmos_for_drives (hwif);
+#if CONFIG_BLK_DEV_PROMISE
+       if (!hwif->is_promise2 &&
+          (ide_check_region(hwif->io_ports[IDE_DATA_OFFSET],8) || ide_check_region(hwif->io_ports[IDE_CONTROL_OFFSET],1))) {
+#else
+       if (ide_check_region(hwif->io_ports[IDE_DATA_OFFSET],8) || ide_check_region(hwif->io_ports[IDE_CONTROL_OFFSET],1)) {
+#endif /* CONFIG_BLK_DEV_PROMISE */
+               int msgout = 0;
+               for (unit = 0; unit < MAX_DRIVES; ++unit) {
+                       ide_drive_t *drive = &hwif->drives[unit];
+                       if (drive->present) {
+                               drive->present = 0;
+                               printk("%s: ERROR, PORTS ALREADY IN USE\n", drive->name);
+                               msgout = 1;
+                       }
+               }
+               if (!msgout)
+                       printk("%s: ports already in use, skipping probe\n", hwif->name);
+               return; 
+       }
+
+       save_flags(flags);
+       sti();  /* needed for jiffies and irq probing */
+       /*
+        * Second drive should only exist if first drive was found,
+        * but a lot of cdrom drives are configured as single slaves.
+        */
+       for (unit = 0; unit < MAX_DRIVES; ++unit) {
+               ide_drive_t *drive = &hwif->drives[unit];
+               (void) probe_for_drive (drive);
+               if (drive->present && !hwif->present) {
+                       hwif->present = 1;
+                       ide_request_region(hwif->io_ports[IDE_DATA_OFFSET],  8, hwif->name);
+                       ide_request_region(hwif->io_ports[IDE_CONTROL_OFFSET], 1, hwif->name);
+               }
+       }
+       if (hwif->reset) {
+               unsigned long timeout = jiffies + WAIT_WORSTCASE;
+               byte stat;
+
+               printk("%s: reset\n", hwif->name);
+               OUT_BYTE(12, hwif->io_ports[IDE_CONTROL_OFFSET]);
+               udelay(10);
+               OUT_BYTE(8, hwif->io_ports[IDE_CONTROL_OFFSET]);
+               do {
+                       delay_50ms();
+                       stat = IN_BYTE(hwif->io_ports[IDE_STATUS_OFFSET]);
+               } while ((stat & BUSY_STAT) && jiffies < timeout);
+       }
+       restore_flags(flags);
+       for (unit = 0; unit < MAX_DRIVES; ++unit) {
+               ide_drive_t *drive = &hwif->drives[unit];
+               if (drive->present) {
+                       ide_tuneproc_t *tuneproc = HWIF(drive)->tuneproc;
+                       if (tuneproc != NULL && drive->autotune == 1)
+                               tuneproc(drive, 255);   /* auto-tune PIO mode */
+               }
+       }
+}
+
+#if MAX_HWIFS > 1
+/*
+ * save_match() is used to simplify logic in init_irq() below.
+ *
+ * A loophole here is that we may not know about a particular
+ * hwif's irq until after that hwif is actually probed/initialized..
+ * This could be a problem for the case where an hwif is on a
+ * dual interface that requires serialization (eg. cmd640) and another
+ * hwif using one of the same irqs is initialized beforehand.
+ *
+ * This routine detects and reports such situations, but does not fix them.
+ */
+static void save_match (ide_hwif_t *hwif, ide_hwif_t *new, ide_hwif_t **match)
+{
+       ide_hwif_t *m = *match;
+
+       if (m && m->hwgroup && m->hwgroup != new->hwgroup) {
+               if (!new->hwgroup)
+                       return;
+               printk("%s: potential irq problem with %s and %s\n", hwif->name, new->name, m->name);
+       }
+       if (!m || m->irq != hwif->irq) /* don't undo a prior perfect match */
+               *match = new;
+}
+#endif /* MAX_HWIFS > 1 */
+
+/*
+ * This routine sets up the irq for an ide interface, and creates a new
+ * hwgroup for the irq/hwif if none was previously assigned.
+ *
+ * Much of the code is for correctly detecting/handling irq sharing
+ * and irq serialization situations.  This is somewhat complex because
+ * it handles static as well as dynamic (PCMCIA) IDE interfaces.
+ *
+ * The SA_INTERRUPT in sa_flags means ide_intr() is always entered with
+ * interrupts completely disabled.  This can be bad for interrupt latency,
+ * but anything else has led to problems on some machines.  We re-enable
+ * interrupts as much as we can safely do in most places.
+ */
+static int init_irq (ide_hwif_t *hwif)
+{
+       unsigned long flags;
+#if MAX_HWIFS > 1
+       unsigned int index;
+#endif /* MAX_HWIFS > 1 */
+       ide_hwgroup_t *hwgroup;
+       ide_hwif_t *match = NULL;
+
+       save_flags(flags);
+       cli();
+
+       hwif->hwgroup = NULL;
+#if MAX_HWIFS > 1
+       /*
+        * Group up with any other hwifs that share our irq(s).
+        */
+       for (index = 0; index < MAX_HWIFS; index++) {
+               ide_hwif_t *h = &ide_hwifs[index];
+               if (h->hwgroup) {  /* scan only initialized hwif's */
+                       if (hwif->irq == h->irq) {
+                               hwif->sharing_irq = h->sharing_irq = 1;
+                               save_match(hwif, h, &match);
+                       }
+                       if (hwif->serialized) {
+                               ide_hwif_t *mate = &ide_hwifs[hwif->index^1];
+                               if (index == mate->index || h->irq == mate->irq)
+                                       save_match(hwif, h, &match);
+                       }
+                       if (h->serialized) {
+                               ide_hwif_t *mate = &ide_hwifs[h->index^1];
+                               if (hwif->irq == mate->irq)
+                                       save_match(hwif, h, &match);
+                       }
+               }
+       }
+#endif /* MAX_HWIFS > 1 */
+       /*
+        * If we are still without a hwgroup, then form a new one
+        */
+       if (match) {
+               hwgroup = match->hwgroup;
+       } else {
+               hwgroup = kmalloc(sizeof(ide_hwgroup_t), GFP_KERNEL);
+               hwgroup->hwif    = hwgroup->next_hwif = hwif->next = hwif;
+               hwgroup->rq      = NULL;
+               hwgroup->handler = NULL;
+               if (hwif->drives[0].present)
+                       hwgroup->drive = &hwif->drives[0];
+               else
+                       hwgroup->drive = &hwif->drives[1];
+               hwgroup->poll_timeout = 0;
+               init_timer(&hwgroup->timer);
+               hwgroup->timer.function = &ide_timer_expiry;
+               hwgroup->timer.data = (unsigned long) hwgroup;
+       }
+
+       /*
+        * Allocate the irq, if not already obtained for another hwif
+        */
+       if (!match || match->irq != hwif->irq) {
+               if (ide_request_irq(hwif->irq, &ide_intr, SA_INTERRUPT, hwif->name, hwgroup)) {
+                       if (!match)
+                               kfree(hwgroup);
+                       restore_flags(flags);
+                       return 1;
+               }
+       }
+
+       /*
+        * Everything is okay, so link us into the hwgroup
+        */
+       hwif->hwgroup = hwgroup;
+       hwif->next = hwgroup->hwif->next;
+       hwgroup->hwif->next = hwif;
+
+       restore_flags(flags);   /* safe now that hwif->hwgroup is set up */
+
+#ifndef __mc68000__
+       printk("%s at 0x%03x-0x%03x,0x%03x on irq %d", hwif->name,
+               hwif->io_ports[IDE_DATA_OFFSET], hwif->io_ports[IDE_DATA_OFFSET]+7, hwif->io_ports[IDE_CONTROL_OFFSET], hwif->irq);
+#else
+       printk("%s at %p on irq 0x%08x", hwif->name, hwif->io_ports[IDE_DATA_OFFSET], hwif->irq);
+#endif /* __mc68000__ */
+       if (match)
+               printk(" (%sed with %s)", hwif->sharing_irq ? "shar" : "serializ", match->name);
+       printk("\n");
+       return 0;
+}
+
+/*
+ * init_gendisk() (as opposed to ide_geninit) is called for each major device,
+ * after probing for drives, to allocate partition tables and other data
+ * structures needed for the routines in genhd.c.  ide_geninit() gets called
+ * somewhat later, during the partition check.
+ */
+static void init_gendisk (ide_hwif_t *hwif)
+{
+       struct gendisk *gd, **gdp;
+       unsigned int unit, units, minors;
+       int *bs;
+
+       /* figure out maximum drive number on the interface */
+       for (units = MAX_DRIVES; units > 0; --units) {
+               if (hwif->drives[units-1].present)
+                       break;
+       }
+       minors    = units * (1<<PARTN_BITS);
+       gd        = kmalloc (sizeof(struct gendisk), GFP_KERNEL);
+       gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL);
+       gd->part  = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL);
+       bs        = kmalloc (minors*sizeof(int), GFP_KERNEL);
+
+       memset(gd->part, 0, minors * sizeof(struct hd_struct));
+
+       /* cdroms and msdos f/s are examples of non-1024 blocksizes */
+       blksize_size[hwif->major] = bs;
+       for (unit = 0; unit < minors; ++unit)
+               *bs++ = BLOCK_SIZE;
+
+       for (unit = 0; unit < units; ++unit)
+               hwif->drives[unit].part = &gd->part[unit << PARTN_BITS];
+
+       gd->major       = hwif->major;          /* our major device number */
+       gd->major_name  = IDE_MAJOR_NAME;       /* treated special in genhd.c */
+       gd->minor_shift = PARTN_BITS;           /* num bits for partitions */
+       gd->max_p       = 1<<PARTN_BITS;        /* 1 + max partitions / drive */
+       gd->max_nr      = units;                /* max num real drives */
+       gd->nr_real     = units;                /* current num real drives */
+       gd->init        = &ide_geninit;         /* initialization function */
+       gd->real_devices= hwif;                 /* ptr to internal data */
+       gd->next        = NULL;                 /* linked list of major devs */
+
+       for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) ;
+       hwif->gd = *gdp = gd;                   /* link onto tail of list */
+}
+
+static int hwif_init (int h)
+{
+       ide_hwif_t *hwif = &ide_hwifs[h];
+       void (*rfn)(void);
+       
+       if (!hwif->present)
+               return 0;
+       if (!hwif->irq) {
+               if (!(hwif->irq = ide_default_irq(hwif->io_ports[IDE_DATA_OFFSET]))) {
+                       printk("%s: DISABLED, NO IRQ\n", hwif->name);
+                       return (hwif->present = 0);
+               }
+       }
+#ifdef CONFIG_BLK_DEV_HD
+       if (hwif->irq == HD_IRQ && hwif->io_ports[IDE_DATA_OFFSET] != HD_DATA) {
+               printk("%s: CANNOT SHARE IRQ WITH OLD HARDDISK DRIVER (hd.c)\n", hwif->name);
+               return (hwif->present = 0);
+       }
+#endif /* CONFIG_BLK_DEV_HD */
+       
+       hwif->present = 0; /* we set it back to 1 if all is ok below */
+       switch (hwif->major) {
+       case IDE0_MAJOR: rfn = &do_ide0_request; break;
+#if MAX_HWIFS > 1
+       case IDE1_MAJOR: rfn = &do_ide1_request; break;
+#endif
+#if MAX_HWIFS > 2
+       case IDE2_MAJOR: rfn = &do_ide2_request; break;
+#endif
+#if MAX_HWIFS > 3
+       case IDE3_MAJOR: rfn = &do_ide3_request; break;
+#endif
+       default:
+               printk("%s: request_fn NOT DEFINED\n", hwif->name);
+               return (hwif->present = 0);
+       }
+       if (register_blkdev (hwif->major, hwif->name, ide_fops)) {
+               printk("%s: UNABLE TO GET MAJOR NUMBER %d\n", hwif->name, hwif->major);
+       } else if (init_irq (hwif)) {
+               printk("%s: UNABLE TO GET IRQ %d\n", hwif->name, hwif->irq);
+               (void) unregister_blkdev (hwif->major, hwif->name);
+       } else {
+               init_gendisk(hwif);
+               blk_dev[hwif->major].request_fn = rfn;
+               read_ahead[hwif->major] = 8;    /* (4kB) */
+               hwif->present = 1;      /* success */
+       }
+       return hwif->present;
+}
+
+int ideprobe_init (void);
+static ide_module_t ideprobe_module = {
+       IDE_PROBE_MODULE,
+       ideprobe_init,
+       NULL
+};
+
+int ideprobe_init (void)
+{
+       unsigned int index;
+       int probe[MAX_HWIFS];
+       
+       MOD_INC_USE_COUNT;
+       memset(probe, 0, MAX_HWIFS * sizeof(int));
+       for (index = 0; index < MAX_HWIFS; ++index)
+               probe[index] = !ide_hwifs[index].present;
+
+       /*
+        * Probe for drives in the usual way.. CMOS/BIOS, then poke at ports
+        */
+       for (index = 0; index < MAX_HWIFS; ++index)
+               if (probe[index]) probe_hwif (&ide_hwifs[index]);
+       for (index = 0; index < MAX_HWIFS; ++index)
+               if (probe[index]) hwif_init (index);
+       ide_register_module(&ideprobe_module);
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+#ifdef MODULE
+int init_module (void)
+{
+       unsigned int index;
+       
+       for (index = 0; index < MAX_HWIFS; ++index)
+               ide_unregister(index);
+       return ideprobe_init();
+}
+
+void cleanup_module (void)
+{
+       ide_unregister_module(&ideprobe_module);
+}
+#endif /* MODULE */
index 3de8904180f2cd0217b982cd6f2e1693fdfbc80a..244072d915132055f94cbd436d02d229e89f9f80 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * linux/drivers/block/ide-tape.c      Version 1.8 - ALPHA     Sep  26, 1996
+ * linux/drivers/block/ide-tape.c      Version 1.10 - BETA     Nov   5, 1996
  *
  * Copyright (C) 1995, 1996 Gadi Oxman <gadio@netvision.net.il>
  *
  * The block device major and minor numbers are determined from the
  * tape's relative position in the ide interfaces, as explained in ide.c.
  *
- * The character device interface consists of two devices:
+ * The character device interface consists of the following devices:
  *
- * ht0         major=37,minor=0        first IDE tape, rewind on close.
- * nht0                major=37,minor=128      first IDE tape, no rewind on close.
+ * ht0         major 37, minor 0       first  IDE tape, rewind on close.
+ * ht1         major 37, minor 1       second IDE tape, rewind on close.
+ * ...
+ * nht0                major 37, minor 128     first  IDE tape, no rewind on close.
+ * nht1                major 37, minor 129     second IDE tape, no rewind on close.
+ * ...
  *
- * Run /usr/src/linux/scripts/MAKEDEV.ide to create the above entries.
- * We currently support only one ide tape drive.
+ * Run linux/scripts/MAKEDEV.ide to create the above entries.
  *
  * The general magnetic tape commands compatible interface, as defined by
  * include/linux/mtio.h, is accessible through the character device.
  * following scenario:
  *
  *     1.      ide-tape is operating in the pipelined operation mode.
- *     2.      All character device read/write requests consist of an
- *             integral number of the tape's recommended data transfer unit
- *             (which is shown on initialization and can be received with
- *              an ioctl).
- *             As of version 1.3 of the driver, this is no longer as critical
- *             as it used to be.
- *     3.      No buffering is performed by the user backup program.
+ *     2.      No buffering is performed by the user backup program.
  *
  * Testing was done with a 2 GB CONNER CTMA 4000 IDE ATAPI Streaming Tape Drive.
  * 
  * Ver 1.7   Sep 10 96   Minor changes for the CONNER CTT8000-A model.
  * Ver 1.8   Sep 26 96   Attempt to find a better balance between good
  *                        interactive response and high system throughput.
- *
- * We are currently in an *alpha* stage. The driver is not complete and not
- * much tested. I would strongly suggest to:
- *
- *     1. Connect the tape to a separate interface and irq.
- *     2. Be truly prepared for a kernel crash and the resulting data loss.
- *     3. Don't rely too much on the resulting backups.
- *
- * Other than that, enjoy !
+ * Ver 1.9   Nov  5 96   Automatically cross encountered filemarks rather
+ *                        than requiring an explicit FSF command.
+ *                       Abort pending requests at end of media.
+ *                       MTTELL was sometimes returning incorrect results.
+ *                       Return the real block size in the MTIOCGET ioctl.
+ *                       Some error recovery bug fixes.
+ * Ver 1.10  Nov  5 96   Major reorganization.
+ *                       Reduced CPU overhead a bit by eliminating internal
+ *                        bounce buffers.
+ *                       Added module support.
+ *                       Added multiple tape drives support.
+ *                       Added partition support.
+ *                       Rewrote DSC handling.
+ *                       Some portability fixes.
+ *                       Removed ide-tape.h.
+ *                       Additional minor changes.
  *
  * Here are some words from the first releases of hd.c, which are quoted
  * in ide.c and apply here as well:
  *     pipelined mode might be the best option.
  *
  * You can enable/disable/tune the pipelined operation mode by adjusting
- * the compile time parameters in ide-tape.h.
+ * the compile time parameters below.
  */
 
 /*
  */
 
 #include <linux/config.h>
-#include <linux/hdreg.h>
+#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <asm/irq.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
+#include <asm/unaligned.h>
+#include <asm/bitops.h>
 
 /*
  *     Main Linux ide driver include file
- *
- *     Automatically includes our include file - ide-tape.h.
  */
-#include "ide.h"               
+#include "ide.h"
 
 /*
- *     Supported ATAPI tape drives packet commands
+ *     For general magnetic tape device compatibility.
  */
+#include <linux/mtio.h>
 
-#define        IDETAPE_TEST_UNIT_READY_CMD     0x00
-#define        IDETAPE_REWIND_CMD              0x01
-#define        IDETAPE_REQUEST_SENSE_CMD       0x03
-#define        IDETAPE_READ_CMD                0x08
-#define        IDETAPE_WRITE_CMD               0x0a
-#define        IDETAPE_WRITE_FILEMARK_CMD      0x10
-#define        IDETAPE_SPACE_CMD               0x11
-#define        IDETAPE_INQUIRY_CMD             0x12
-#define        IDETAPE_ERASE_CMD               0x19
-#define        IDETAPE_MODE_SENSE_CMD          0x1a
-#define        IDETAPE_LOAD_UNLOAD_CMD         0x1b
-#define        IDETAPE_LOCATE_CMD              0x2b
-#define        IDETAPE_READ_POSITION_CMD       0x34
+/**************************** Tunable parameters *****************************/
 
 /*
- *     Some defines for the SPACE command
+ *     Pipelined mode parameters.
+ *
+ *     We try to use the minimum number of stages which is enough to
+ *     keep the tape constantly streaming. To accomplish that, we implement
+ *     a feedback loop around the maximum number of stages:
  *
- *     (The code field in the SPACE packet command).
+ *     We start from MIN maximum stages (we will not even use MIN stages
+ *      if we don't need them), increment it by RATE*(MAX-MIN)
+ *     whenever we sense that the pipeline is empty, until we reach
+ *     the optimum value or until we reach MAX.
+ *
+ *     Setting the following parameter to 0 will disable the pipelined mode.
  */
-#define        IDETAPE_SPACE_OVER_FILEMARK     1
-#define        IDETAPE_SPACE_TO_EOD            3
+#define IDETAPE_MIN_PIPELINE_STAGES    100
+#define IDETAPE_MAX_PIPELINE_STAGES    200
+#define IDETAPE_INCREASE_STAGES_RATE    20
 
 /*
- *     Some defines for the LOAD UNLOAD command
+ *     Assuming the tape shares an interface with another device, the default
+ *     behavior is to service our pending pipeline requests as soon as
+ *     possible, but to gracefully postpone them in favor of the other device
+ *     when the tape is busy. This has the potential to maximize our
+ *     throughput and in the same time, to make efficient use of the IDE bus.
+ *
+ *     Note that when we transfer data to / from the tape, we co-operate with
+ *     the relatively fast tape buffers and the tape will perform the
+ *     actual media access in the background, without blocking the IDE
+ *     bus. This means that as long as the maximum IDE bus throughput is much
+ *     higher than the sum of our maximum throughput and the maximum
+ *     throughput of the other device, we should probably leave the default
+ *     behavior.
+ *
+ *     However, if it is still desired to give the other device a share even
+ *     in our own (small) bus bandwidth, you can set IDETAPE_LOW_TAPE_PRIORITY
+ *     to 1. This will let the other device finish *all* its pending requests
+ *     before we even check if we can service our next pending request.
  */
-#define        IDETAPE_LU_LOAD_MASK            1
-#define        IDETAPE_LU_RETENSION_MASK       2
-#define        IDETAPE_LU_EOT_MASK             4
+#define IDETAPE_LOW_TAPE_PRIORITY      0
 
 /*
- *     Our ioctls - We will use 0x034n and 0x035n
+ *     The following are used to debug the driver:
+ *
+ *     Setting IDETAPE_DEBUG_LOG to 1 will log driver flow control.
+ *     Setting IDETAPE_DEBUG_BUGS to 1 will enable self-sanity checks in
+ *     some places.
+ *
+ *     Setting them to 0 will restore normal operation mode:
  *
- *     Nothing special meanwhile.
- *     mtio.h MTIOCTOP compatible commands are supported on the character
- *     device interface.
+ *             1.      Disable logging normal successful operations.
+ *             2.      Disable self-sanity checks.
+ *             3.      Errors will still be logged, of course.
+ *
+ *     All the #if DEBUG code will be removed some day, when the driver
+ *     is verified to be stable enough. This will make it much more
+ *     esthetic.
  */
+#define IDETAPE_DEBUG_LOG              0
+#define IDETAPE_DEBUG_BUGS             1
 
 /*
- *     Special requests for our block device strategy routine.
- *
- *     In order to service a character device command, we add special
- *     requests to the tail of our block device request queue and wait
- *     for their completion.
+ *     After each failed packet command we issue a request sense command
+ *     and retry the packet command IDETAPE_MAX_PC_RETRIES times.
  *
+ *     Setting IDETAPE_MAX_PC_RETRIES to 0 will disable retries.
  */
+#define IDETAPE_MAX_PC_RETRIES         3
 
-#define        IDETAPE_FIRST_REQUEST                   90
+/*
+ *     With each packet command, we allocate a buffer of
+ *     IDETAPE_PC_BUFFER_SIZE bytes. This is used for several packet
+ *     commands (Not for READ/WRITE commands).
+ */
+#define IDETAPE_PC_BUFFER_SIZE         256
 
 /*
- *     IDETAPE_PACKET_COMMAND_REQUEST_TYPE1 is used to queue a packet command
- *     in the request queue. We will wait for DSC before issuing the command
- *     if it is still not set. In that case, we will temporary replace the
- *     cmd field to type 2 and restore it back to type 1 when we receive DSC
- *     and can start with sending the command.
+ *     In various places in the driver, we need to allocate storage
+ *     for packet commands and requests, which will remain valid while
+ *     we leave the driver to wait for an interrupt or a timeout event.
  */
-#define        IDETAPE_PACKET_COMMAND_REQUEST_TYPE1    90
-#define        IDETAPE_PACKET_COMMAND_REQUEST_TYPE2    91
+#define IDETAPE_PC_STACK               (10 + IDETAPE_MAX_PC_RETRIES)
 
 /*
- *     IDETAPE_READ_REQUEST and IDETAPE_WRITE_REQUEST are used by our
- *     character device interface to request read/write operations from
- *     our block device interface.
+ *     DSC polling parameters.
+ *
+ *     Polling for DSC (a single bit in the status register) is a very
+ *     important function in ide-tape. There are two cases in which we
+ *     poll for DSC:
  *
- *     In case a read or write request was requested by the buffer cache
- *     and not by our character device interface, the cmd field in the
- *     request will contain READ and WRITE instead.
+ *     1.      Before a read/write packet command, to ensure that we
+ *             can transfer data from/to the tape's data buffers, without
+ *             causing an actual media access. In case the tape is not
+ *             ready yet, we take out our request from the device
+ *             request queue, so that ide.c will service requests from
+ *             the other device on the same interface meanwhile.
  *
- *     We handle both cases in a similar way. The main difference is that
- *     in our own requests, buffer head is NULL and idetape_end_request
- *     will update the errors field if the request was not completed.
+ *     2.      After the successful initialization of a "media access
+ *             packet command", which is a command which can take a long
+ *             time to complete (it can be several seconds or even an hour).
+ *
+ *             Again, we postpone our request in the middle to free the bus
+ *             for the other device. The polling frequency here should be
+ *             lower than the read/write frequency since those media access
+ *             commands are slow. We start from a "fast" frequency -
+ *             IDETAPE_DSC_MA_FAST (one second), and if we don't receive DSC
+ *             after IDETAPE_DSC_MA_THRESHOLD (5 minutes), we switch it to a
+ *             lower frequency - IDETAPE_DSC_MA_SLOW (1 minute).
+ *
+ *     We also set a timeout for the timer, in case something goes wrong.
+ *     The timeout should be longer then the maximum execution time of a
+ *     tape operation.
+ */
+/*
+ *     The following parameter is used to select the point in the internal
+ *     tape fifo in which we will start to refill the buffer. Decreasing
+ *     the following parameter will improve the system's latency and
+ *     interactive response, while using a high value might improve sytem
+ *     throughput.
+ */
+#define IDETAPE_FIFO_THRESHOLD                 2
+
+/*
+ *     DSC timings.
  */
+#define IDETAPE_DSC_RW_MIN             5*HZ/100        /* 50 msec */
+#define IDETAPE_DSC_RW_MAX             40*HZ/100       /* 400 msec */
+#define IDETAPE_DSC_RW_TIMEOUT         2*60*HZ         /* 2 minutes */
+#define IDETAPE_DSC_MA_FAST            2*HZ            /* 2 seconds */
+#define IDETAPE_DSC_MA_THRESHOLD       5*60*HZ         /* 5 minutes */
+#define IDETAPE_DSC_MA_SLOW            30*HZ           /* 30 seconds */
+#define IDETAPE_DSC_MA_TIMEOUT         2*60*60*HZ      /* 2 hours */
 
-#define        IDETAPE_READ_REQUEST                    92
-#define        IDETAPE_WRITE_REQUEST                   93
+/*************************** End of tunable parameters ***********************/
 
-#define IDETAPE_LAST_REQUEST                   93
+typedef enum {
+       idetape_direction_none,
+       idetape_direction_read,
+       idetape_direction_write
+} idetape_chrdev_direction_t;
 
 /*
- *     A macro which can be used to check if a we support a given
- *     request command.
+ *     Our view of a packet command.
  */
+typedef struct idetape_packet_command_s {
+       u8 c[12];                               /* Actual packet bytes */
+       int retries;                            /* On each retry, we increment retries */
+       int error;                              /* Error code */
+       int request_transfer;                   /* Bytes to transfer */
+       int actually_transferred;               /* Bytes actually transferred */
+       int buffer_size;                        /* Size of our data buffer */
+       struct buffer_head *bh;
+       char *b_data;
+       int b_count;
+       byte *buffer;                           /* Data buffer */
+       byte *current_position;                 /* Pointer into the above buffer */
+       void (*callback) (ide_drive_t *);       /* Called when this packet command is completed */
+       byte pc_buffer[IDETAPE_PC_BUFFER_SIZE]; /* Temporary buffer */
+       unsigned int flags;                     /* Status/Action bit flags */
+} idetape_pc_t;
 
-#define IDETAPE_REQUEST_CMD(cmd)       ((cmd >= IDETAPE_FIRST_REQUEST) && (cmd <= IDETAPE_LAST_REQUEST))
+/*
+ *     Packet command flag bits.
+ */
+#define        PC_ABORT                        0       /* Set when an error is considered normal - We won't retry */
+#define PC_WAIT_FOR_DSC                        1       /* 1 When polling for DSC on a media access command */
+#define PC_DMA_RECOMMENDED             2       /* 1 when we prefer to use DMA if possible */
+#define        PC_DMA_IN_PROGRESS              3       /* 1 while DMA in progress */
+#define        PC_DMA_ERROR                    4       /* 1 when encountered problem during DMA */
+#define        PC_WRITING                      5       /* Data direction */
 
 /*
- *     We are now able to postpone an idetape request in the stage
- *     where it is polling for DSC and service requests from the other
- *     ide device meanwhile.
+ *     Capabilities and Mechanical Status Page
+ */
+typedef struct {
+       unsigned        page_code       :6;     /* Page code - Should be 0x2a */
+       unsigned        reserved1_67    :2;
+       u8              page_length;            /* Page Length - Should be 0x12 */
+       u8              reserved2, reserved3;
+       unsigned        ro              :1;     /* Read Only Mode */
+       unsigned        reserved4_1234  :4;
+       unsigned        sprev           :1;     /* Supports SPACE in the reverse direction */
+       unsigned        reserved4_67    :2;
+       unsigned        reserved5_012   :3;
+       unsigned        efmt            :1;     /* Supports ERASE command initiated formatting */
+       unsigned        reserved5_4     :1;
+       unsigned        qfa             :1;     /* Supports the QFA two partition formats */
+       unsigned        reserved5_67    :2;
+       unsigned        lock            :1;     /* Supports locking the volume */
+       unsigned        locked          :1;     /* The volume is locked */
+       unsigned        prevent         :1;     /* The device defaults in the prevent state after power up */   
+       unsigned        eject           :1;     /* The device can eject the volume */
+       unsigned        reserved6_45    :2;     /* Reserved */  
+       unsigned        ecc             :1;     /* Supports error correction */
+       unsigned        cmprs           :1;     /* Supports data compression */
+       unsigned        reserved7_0     :1;
+       unsigned        blk512          :1;     /* Supports 512 bytes block size */
+       unsigned        blk1024         :1;     /* Supports 1024 bytes block size */
+       unsigned        reserved7_3_6   :4;
+       unsigned        slowb           :1;     /* The device restricts the byte count for PIO */
+                                               /* transfers for slow buffer memory ??? */
+       u16             max_speed;              /* Maximum speed supported in KBps */
+       u8              reserved10, reserved11;
+       u16             ctl;                    /* Continuous Transfer Limit in blocks */
+       u16             speed;                  /* Current Speed, in KBps */
+       u16             buffer_size;            /* Buffer Size, in 512 bytes */
+       u8              reserved18, reserved19;
+} idetape_capabilities_page_t;
+
+/*
+ *     A pipeline stage.
+ */
+typedef struct idetape_stage_s {
+       struct request rq;                      /* The corresponding request */
+       struct buffer_head *bh;                 /* The data buffers */
+       struct idetape_stage_s *next;           /* Pointer to the next stage */
+} idetape_stage_t;
+
+/*
+ *     Most of our global data which we need to save even as we leave the
+ *     driver due to an interrupt or a timer event is stored in a variable
+ *     of type idetape_tape_t, defined below.
  */
+typedef struct {
+       ide_drive_t *drive;
 
-#define        IDETAPE_RQ_POSTPONED            0x1234
+       /*
+        *      Since a typical character device operation requires more
+        *      than one packet command, we provide here enough memory
+        *      for the maximum of interconnected packet commands.
+        *      The packet commands are stored in the circular array pc_stack.
+        *      pc_stack_index points to the last used entry, and warps around
+        *      to the start when we get to the last array entry.
+        *
+        *      pc points to the current processed packet command.
+        *
+        *      failed_pc points to the last failed packet command, or contains
+        *      NULL if we do not need to retry any packet command. This is
+        *      required since an additional packet command is needed before the
+        *      retry, to get detailed information on what went wrong.
+        */
+       idetape_pc_t *pc;                       /* Current packet command */
+       idetape_pc_t *failed_pc;                /* Last failed packet command */
+       idetape_pc_t pc_stack[IDETAPE_PC_STACK];/* Packet command stack */
+       int pc_stack_index;                     /* Next free packet command storage space */
+       struct request rq_stack[IDETAPE_PC_STACK];
+       int rq_stack_index;                     /* We implement a circular array */
+
+       /*
+        *      DSC polling variables.
+        *
+        *      While polling for DSC we use postponed_rq to postpone the
+        *      current request so that ide.c will be able to service
+        *      pending requests on the other device. Note that at most
+        *      we will have only one DSC (usually data transfer) request
+        *      in the device request queue. Additional requests can be
+        *      queued in our internal pipeline, but they will be visible
+        *      to ide.c only one at a time.
+        */
+       struct request *postponed_rq;
+       unsigned long dsc_polling_start;        /* The time in which we started polling for DSC */
+       struct timer_list dsc_timer;            /* Timer used to poll for dsc */
+       unsigned long best_dsc_rw_frequency;    /* Read/Write dsc polling frequency */
+       unsigned long dsc_polling_frequency;    /* The current polling frequency */
+       unsigned long dsc_timeout;              /* Maximum waiting time */
+
+       /*
+        *      Position information
+        */
+       byte partition;
+       unsigned int block_address;             /* Current block */
+
+       /*
+        *      Last error information
+        */
+       byte sense_key, asc, ascq;
+
+       /*
+        *      Character device operation
+        */
+       unsigned int minor;
+       char name[4];                                   /* device name */
+       idetape_chrdev_direction_t chrdev_direction;    /* Current character device data transfer direction */
+
+       /*
+        *      Device information
+        */
+       unsigned short tape_block_size;                 /* Usually 512 or 1024 bytes */
+       int user_bs_factor;
+       idetape_capabilities_page_t capabilities;       /* Copy of the tape's Capabilities and Mechanical Page */
+
+       /*
+        *      Active data transfer request parameters.
+        *
+        *      At most, there is only one ide-tape originated data transfer
+        *      request in the device request queue. This allows ide.c to
+        *      easily service requests from the other device when we
+        *      postpone our active request. In the pipelined operation
+        *      mode, we use our internal pipeline structure to hold
+        *      more data requests.
+        *
+        *      The data buffer size is chosen based on the tape's
+        *      recommendation.
+        */
+       struct request *active_data_request;    /* Pointer to the request which is waiting in the device request queue */
+       int stage_size;                         /* Data buffer size (chosen based on the tape's recommendation */
+       idetape_stage_t *merge_stage;
+       int merge_stage_size;
+       struct buffer_head *bh;
+       char *b_data;
+       int b_count;
+       
+       /*
+        *      Pipeline parameters.
+        *
+        *      To accomplish non-pipelined mode, we simply set the following
+        *      variables to zero (or NULL, where appropriate).
+        */
+       int nr_stages;                          /* Number of currently used stages */
+       int nr_pending_stages;                  /* Number of pending stages */
+       int max_stages;                         /* We will not allocate more than this number of stages */
+       idetape_stage_t *first_stage;           /* The first stage which will be removed from the pipeline */
+       idetape_stage_t *active_stage;          /* The currently active stage */
+       idetape_stage_t *next_stage;            /* Will be serviced after the currently active request */
+       idetape_stage_t *last_stage;            /* New requests will be added to the pipeline here */
+       idetape_stage_t *cache_stage;           /* Optional free stage which we can use */
+       int pages_per_stage;
+       int excess_bh_size;                     /* Wasted space in each stage */
+
+       unsigned int flags;                     /* Status/Action flags */
+} idetape_tape_t;
 
 /*
- *     Error codes which are returned in rq->errors to the higher part
- *     of the driver.
+ *     Tape flag bits values.
  */
+#define IDETAPE_IGNORE_DSC             0
+#define IDETAPE_ADDRESS_VALID          1       /* 0 When the tape position is unknown */
+#define IDETAPE_BUSY                   2       /* Device already opened */
+#define IDETAPE_PIPELINE_ERROR         3       /* Error detected in a pipeline stage */
+#define IDETAPE_DETECT_BS              4       /* Attempt to auto-detect the current user block size */
+#define IDETAPE_FILEMARK               5       /* Currently on a filemark */
 
-#define        IDETAPE_RQ_ERROR_GENERAL        1 
-#define        IDETAPE_RQ_ERROR_FILEMARK       2
-#define        IDETAPE_RQ_ERROR_EOD            3
+/*
+ *     Supported ATAPI tape drives packet commands
+ */
+#define IDETAPE_TEST_UNIT_READY_CMD    0x00
+#define IDETAPE_REWIND_CMD             0x01
+#define IDETAPE_REQUEST_SENSE_CMD      0x03
+#define IDETAPE_READ_CMD               0x08
+#define IDETAPE_WRITE_CMD              0x0a
+#define IDETAPE_WRITE_FILEMARK_CMD     0x10
+#define IDETAPE_SPACE_CMD              0x11
+#define IDETAPE_INQUIRY_CMD            0x12
+#define IDETAPE_ERASE_CMD              0x19
+#define IDETAPE_MODE_SENSE_CMD         0x1a
+#define IDETAPE_LOAD_UNLOAD_CMD                0x1b
+#define IDETAPE_LOCATE_CMD             0x2b
+#define IDETAPE_READ_POSITION_CMD      0x34
 
 /*
- *     ATAPI Task File Registers (Re-definition of the ATA Task File
- *     Registers for an ATAPI packet command).
- *     From Table 3-2 of QIC-157C.
+ *     Some defines for the SPACE command
  */
+#define IDETAPE_SPACE_OVER_FILEMARK    1
+#define IDETAPE_SPACE_TO_EOD           3
 
-/* Read Access */
+/*
+ *     Some defines for the LOAD UNLOAD command
+ */
+#define IDETAPE_LU_LOAD_MASK           1
+#define IDETAPE_LU_RETENSION_MASK      2
+#define IDETAPE_LU_EOT_MASK            4
 
-#define        IDETAPE_DATA_OFFSET             (0)
-#define IDETAPE_ERROR_OFFSET           (1)
-#define        IDETAPE_IREASON_OFFSET          (2)
-#define IDETAPE_RESERVED3_OFFSET       (3)
-#define IDETAPE_BCOUNTL_OFFSET         (4)
-#define        IDETAPE_BCOUNTH_OFFSET          (5)
-#define IDETAPE_DRIVESEL_OFFSET                (6)
-#define        IDETAPE_STATUS_OFFSET           (7)
+/*
+ *     Special requests for our block device strategy routine.
+ *
+ *     In order to service a character device command, we add special
+ *     requests to the tail of our block device request queue and wait
+ *     for their completion.
+ *
+ */
+#define IDETAPE_FIRST_RQ               90
 
-#define        IDETAPE_DATA_REG                (HWIF(drive)->io_base+IDETAPE_DATA_OFFSET)
-#define IDETAPE_ERROR_REG              (HWIF(drive)->io_base+IDETAPE_ERROR_OFFSET)
-#define        IDETAPE_IREASON_REG             (HWIF(drive)->io_base+IDETAPE_IREASON_OFFSET)
-#define IDETAPE_RESERVED3_REG          (HWIF(drive)->io_base+IDETAPE_RESERVED3_OFFSET)
-#define IDETAPE_BCOUNTL_REG            (HWIF(drive)->io_base+IDETAPE_BCOUNTL_OFFSET)
-#define        IDETAPE_BCOUNTH_REG             (HWIF(drive)->io_base+IDETAPE_BCOUNTH_OFFSET)
-#define IDETAPE_DRIVESEL_REG           (HWIF(drive)->io_base+IDETAPE_DRIVESEL_OFFSET)
-#define        IDETAPE_STATUS_REG              (HWIF(drive)->io_base+IDETAPE_STATUS_OFFSET)
+/*
+ *     IDETAPE_PC_RQ is used to queue a packet command in the request queue.
+ */
+#define IDETAPE_PC_RQ                  90
 
-/* Write Access */
+/*
+ *     IDETAPE_READ_RQ and IDETAPE_WRITE_RQ are used by our
+ *     character device interface to request read/write operations from
+ *     our block device interface.
+ */
+#define IDETAPE_READ_RQ                        92
+#define IDETAPE_WRITE_RQ               93
+#define IDETAPE_ABORTED_WRITE_RQ       94
 
-#define        IDETAPE_FEATURES_OFFSET         (1)
-#define IDETAPE_ATACOMMAND_OFFSET      (7)
+#define IDETAPE_LAST_RQ                        94
 
-#define IDETAPE_FEATURES_REG           (HWIF(drive)->io_base+IDETAPE_FEATURES_OFFSET)
-#define IDETAPE_ATACOMMAND_REG         (HWIF(drive)->io_base+IDETAPE_ATACOMMAND_OFFSET)
-#define IDETAPE_CONTROL_REG            (HWIF(drive)->ctl_port)
+/*
+ *     A macro which can be used to check if a we support a given
+ *     request command.
+ */
+#define IDETAPE_RQ_CMD(cmd)            ((cmd >= IDETAPE_FIRST_RQ) && (cmd <= IDETAPE_LAST_RQ))
 
+/*
+ *     We are now able to postpone an idetape request in the stage
+ *     where it is polling for DSC and service requests from the other
+ *     ide device meanwhile.
+ */
+#define        IDETAPE_RQ_POSTPONED            0x1234
 
 /*
- *     Structure of the various task file registers
+ *     Error codes which are returned in rq->errors to the higher part
+ *     of the driver.
  */
+#define        IDETAPE_ERROR_GENERAL           101
+#define        IDETAPE_ERROR_FILEMARK          102
+#define        IDETAPE_ERROR_EOD               103
 
 /*
  *     The ATAPI Status Register.
  */
 typedef union {
        unsigned all                    :8;
        struct {
@@ -496,11 +776,9 @@ typedef union {
                unsigned idx            :1;     /* Reserved */
                unsigned corr           :1;     /* Correctable error occurred */
                unsigned drq            :1;     /* Data is request by the device */
-               unsigned dsc            :1;     /* Set when a media access command is finished */
-                                               /* Reads / Writes are NOT media access commands */
+               unsigned dsc            :1;     /* Buffer availability / Media access command finished */
                unsigned reserved5      :1;     /* Reserved */
-               unsigned drdy           :1;     /* Ignored for ATAPI commands */
-                                               /* (The device is ready to accept ATA command) */
+               unsigned drdy           :1;     /* Ignored for ATAPI commands (ready to accept ATA command) */
                unsigned bsy            :1;     /* The device has access to the command block */
        } b;
 } idetape_status_reg_t;
@@ -508,7 +786,6 @@ typedef union {
 /*
  *     The ATAPI error register.
  */
 typedef union {
        unsigned all                    :8;
        struct {
@@ -523,7 +800,6 @@ typedef union {
 /*
  *     ATAPI Feature Register
  */
 typedef union {
        unsigned all                    :8;
        struct {
@@ -537,7 +813,6 @@ typedef union {
 /*
  *     ATAPI Byte Count Register.
  */
 typedef union {
        unsigned all                    :16;
        struct {
@@ -549,7 +824,6 @@ typedef union {
 /*
  *     ATAPI Interrupt Reason Register.
  */
 typedef union {
        unsigned all                    :8;
        struct {
@@ -562,7 +836,6 @@ typedef union {
 /*
  *     ATAPI Drive Select Register
  */
 typedef union {        
        unsigned all                    :8;
        struct {
@@ -577,7 +850,6 @@ typedef union {
 /*
  *     ATAPI Device Control Register
  */
 typedef union {                        
        unsigned all                    :8;
        struct {
@@ -593,126 +865,91 @@ typedef union {
  *     idetape_chrdev_t provides the link between out character device
  *     interface and our block device interface and the corresponding
  *     ide_drive_t structure.
- *
- *     We currently support only one tape drive.
- * 
  */
 typedef struct {
        ide_drive_t *drive;
-       int major,minor;
-       char name[4];
 } idetape_chrdev_t;
 
 /*
  *     The following is used to format the general configuration word of
  *     the ATAPI IDENTIFY DEVICE command.
  */
-
 struct idetape_id_gcw {        
-
-       unsigned packet_size    :2;     /* Packet Size */
-       unsigned reserved2      :1;     /* Reserved */
-       unsigned reserved3      :1;     /* Reserved */
-       unsigned reserved4      :1;     /* Reserved */
-       unsigned drq_type       :2;     /* Command packet DRQ type */
-       unsigned removable      :1;     /* Removable media */
-       unsigned device_type    :5;     /* Device type */
-       unsigned reserved13     :1;     /* Reserved */
-       unsigned protocol       :2;     /* Protocol type */
+       unsigned packet_size            :2;     /* Packet Size */
+       unsigned reserved234            :3;     /* Reserved */
+       unsigned drq_type               :2;     /* Command packet DRQ type */
+       unsigned removable              :1;     /* Removable media */
+       unsigned device_type            :5;     /* Device type */
+       unsigned reserved13             :1;     /* Reserved */
+       unsigned protocol               :2;     /* Protocol type */
 };
 
 /*
  *     INQUIRY packet command - Data Format (From Table 6-8 of QIC-157C)
  */
 typedef struct {
-       unsigned device_type    :5;     /* Peripheral Device Type */
-       unsigned reserved0_765  :3;     /* Peripheral Qualifier - Reserved */
-       unsigned reserved1_6t0  :7;     /* Reserved */
-       unsigned rmb            :1;     /* Removable Medium Bit */
-       unsigned ansi_version   :3;     /* ANSI Version */
-       unsigned ecma_version   :3;     /* ECMA Version */
-       unsigned iso_version    :2;     /* ISO Version */
-       unsigned response_format :4;    /* Response Data Format */
-       unsigned reserved3_45   :2;     /* Reserved */
-       unsigned reserved3_6    :1;     /* TrmIOP - Reserved */
-       unsigned reserved3_7    :1;     /* AENC - Reserved */
-       byte additional_length;         /* Additional Length (total_length-4) */
-       byte reserved_5;                /* Reserved */
-       byte reserved_6;                /* Reserved */
-       unsigned reserved7_0    :1;     /* SftRe - Reserved */
-       unsigned reserved7_1    :1;     /* CmdQue - Reserved */
-       unsigned reserved7_2    :1;     /* Reserved */
-       unsigned reserved7_3    :1;     /* Linked - Reserved */
-       unsigned reserved7_4    :1;     /* Sync - Reserved */
-       unsigned reserved7_5    :1;     /* WBus16 - Reserved */
-       unsigned reserved7_6    :1;     /* WBus32 - Reserved */
-       unsigned reserved7_7    :1;     /* RelAdr - Reserved */
-       byte vendor_id [8];             /* Vendor Identification */
-       byte product_id [16];           /* Product Identification */
-       byte revision_level [4];        /* Revision Level */
-       byte vendor_specific [20];      /* Vendor Specific - Optional */
-       byte reserved56t95 [40];        /* Reserved - Optional */
-       
-                                       /* Additional information may be returned */
+       unsigned        device_type     :5;     /* Peripheral Device Type */
+       unsigned        reserved0_765   :3;     /* Peripheral Qualifier - Reserved */
+       unsigned        reserved1_6t0   :7;     /* Reserved */
+       unsigned        rmb             :1;     /* Removable Medium Bit */
+       unsigned        ansi_version    :3;     /* ANSI Version */
+       unsigned        ecma_version    :3;     /* ECMA Version */
+       unsigned        iso_version     :2;     /* ISO Version */
+       unsigned        response_format :4;     /* Response Data Format */
+       unsigned        reserved3_45    :2;     /* Reserved */
+       unsigned        reserved3_6     :1;     /* TrmIOP - Reserved */
+       unsigned        reserved3_7     :1;     /* AENC - Reserved */
+       u8              additional_length;      /* Additional Length (total_length-4) */
+       u8              rsv5, rsv6, rsv7;       /* Reserved */
+       u8              vendor_id[8];           /* Vendor Identification */
+       u8              product_id[16];         /* Product Identification */
+       u8              revision_level[4];      /* Revision Level */
+       u8              vendor_specific[20];    /* Vendor Specific - Optional */
+       u8              reserved56t95[40];      /* Reserved - Optional */
+                                               /* Additional information may be returned */
 } idetape_inquiry_result_t;
 
 /*
  *     READ POSITION packet command - Data Format (From Table 6-57)
  */
 typedef struct {
-       unsigned reserved0_10   :2;     /* Reserved */
-       unsigned bpu            :1;     /* Block Position Unknown */    
-       unsigned reserved0_543  :3;     /* Reserved */
-       unsigned eop            :1;     /* End Of Partition */
-       unsigned bop            :1;     /* Beginning Of Partition */
-       byte partition_num;             /* Partition Number */
-       byte reserved_2;                /* Reserved */
-       byte reserved_3;                /* Reserved */
-       unsigned long first_block;      /* First Block Location */
-       unsigned long last_block;       /* Last Block Location (Optional) */
-       byte reserved_12;               /* Reserved */
-       byte blocks_in_buffer_2;        /* Blocks In Buffer - MSB (Optional) */
-       byte blocks_in_buffer_1;
-       byte blocks_in_buffer_0;        /* Blocks In Buffer - LSB (Optional) */
-       unsigned long bytes_in_buffer;  /* Bytes In Buffer (Optional) */
+       unsigned        reserved0_10    :2;     /* Reserved */
+       unsigned        bpu             :1;     /* Block Position Unknown */    
+       unsigned        reserved0_543   :3;     /* Reserved */
+       unsigned        eop             :1;     /* End Of Partition */
+       unsigned        bop             :1;     /* Beginning Of Partition */
+       u8              partition;              /* Partition Number */
+       u8              reserved2, reserved3;   /* Reserved */
+       u32             first_block;            /* First Block Location */
+       u32             last_block;             /* Last Block Location (Optional) */
+       u8              reserved12;             /* Reserved */
+       u8              blocks_in_buffer[3];    /* Blocks In Buffer - (Optional) */
+       u32             bytes_in_buffer;        /* Bytes In Buffer (Optional) */
 } idetape_read_position_result_t;
 
 /*
  *     REQUEST SENSE packet command result - Data Format.
  */
-
 typedef struct {
-       unsigned error_code     :7;     /* Current of deferred errors */
-       unsigned valid          :1;     /* The information field conforms to QIC-157C */
-       unsigned reserved_1     :8;     /* Segment Number - Reserved */
-       unsigned sense_key      :4;     /* Sense Key */
-       unsigned reserved2_4    :1;     /* Reserved */
-       unsigned ili            :1;     /* Incorrect Length Indicator */
-       unsigned eom            :1;     /* End Of Medium */
-       unsigned filemark       :1;     /* Filemark */
-
-       /*
-        *      We can't use a 32 bit variable, since it will be re-aligned
-        *      by GCC, as we are not on a 32 bit boundary.
-        */
-
-       byte information1;              /* MSB - Information - Command specific */
-       byte information2;
-       byte information3;
-       byte information4;              /* LSB */
-       byte asl;                       /* Additional sense length (n-7) */
-       unsigned long command_specific; /* Additional command specific information */
-       byte asc;                       /* Additional Sense Code */
-       byte ascq;                      /* Additional Sense Code Qualifier */
-       byte replaceable_unit_code;     /* Field Replaceable Unit Code */
-       unsigned sk_specific1   :7;     /* Sense Key Specific */
-       unsigned sksv           :1;     /* Sense Key Specific information is valid */
-       byte sk_specific2;              /* Sense Key Specific */
-       byte sk_specific3;              /* Sense Key Specific */
-       byte pad [2];                   /* Padding to 20 bytes */
+       unsigned        error_code      :7;     /* Current of deferred errors */
+       unsigned        valid           :1;     /* The information field conforms to QIC-157C */
+       u8              reserved1       :8;     /* Segment Number - Reserved */
+       unsigned        sense_key       :4;     /* Sense Key */
+       unsigned        reserved2_4     :1;     /* Reserved */
+       unsigned        ili             :1;     /* Incorrect Length Indicator */
+       unsigned        eom             :1;     /* End Of Medium */
+       unsigned        filemark        :1;     /* Filemark */
+       u32             information __attribute__ ((packed));
+       u8              asl;                    /* Additional sense length (n-7) */
+       u32             command_specific;       /* Additional command specific information */
+       u8              asc;                    /* Additional Sense Code */
+       u8              ascq;                   /* Additional Sense Code Qualifier */
+       u8              replaceable_unit_code;  /* Field Replaceable Unit Code */
+       unsigned        sk_specific1    :7;     /* Sense Key Specific */
+       unsigned        sksv            :1;     /* Sense Key Specific information is valid */
+       u8              sk_specific2;           /* Sense Key Specific */
+       u8              sk_specific3;           /* Sense Key Specific */
+       u8              pad[2];                 /* Padding to 20 bytes */
 } idetape_request_sense_result_t;
 
 /*
@@ -720,19 +957,16 @@ typedef struct {
  *     packet commands. Those packet commands are still not supported
  *     by ide-tape.
  */
-
 #define        IDETAPE_CAPABILITIES_PAGE       0x2a
 
 /*
  *     Mode Parameter Header for the MODE SENSE packet command
  */
-
 typedef struct {
-       byte mode_data_length;          /* The length of the following data that is */
-                                       /* available to be transferred */
-       byte medium_type;               /* Medium Type */
-       byte dsp;                       /* Device Specific Parameter */
-       byte bdl;                       /* Block Descriptor Length */
+       u8              mode_data_length;       /* Length of the following data transfer */
+       u8              medium_type;            /* Medium Type */
+       u8              dsp;                    /* Device Specific Parameter */
+       u8              bdl;                    /* Block Descriptor Length */
 } idetape_mode_parameter_header_t;
 
 /*
@@ -740,1847 +974,1148 @@ typedef struct {
  *
  *     Support for block descriptors is optional.
  */
-
 typedef struct {
-       byte density_code;              /* Medium density code */
-       byte blocks1;                   /* Number of blocks - MSB */
-       byte blocks2;                   /* Number of blocks - Middle byte */
-       byte blocks3;                   /* Number of blocks - LSB */
-       byte reserved4;                 /* Reserved */
-       byte length1;                   /* Block Length - MSB */
-       byte length2;                   /* Block Length - Middle byte */
-       byte length3;                   /* Block Length - LSB */
+       u8              density_code;           /* Medium density code */
+       u8              blocks[3];              /* Number of blocks */
+       u8              reserved4;              /* Reserved */
+       u8              length[3];              /* Block Length */
 } idetape_parameter_block_descriptor_t;
 
 /*
  *     The Data Compression Page, as returned by the MODE SENSE packet command.
  */
 typedef struct {
-       unsigned page_code      :6;     /* Page Code - Should be 0xf */
-       unsigned reserved       :1;     /* Reserved */
-       unsigned ps             :1;
-       byte page_length;               /* Page Length - Should be 14 */
-       unsigned reserved2      :6;     /* Reserved */
-       unsigned dcc            :1;     /* Data Compression Capable */
-       unsigned dce            :1;     /* Data Compression Enable */
-       unsigned reserved3      :5;     /* Reserved */
-       unsigned red            :2;     /* Report Exception on Decompression */
-       unsigned dde            :1;     /* Data Decompression Enable */
-       unsigned long ca;               /* Compression Algorithm */
-       unsigned long da;               /* Decompression Algorithm */
-       byte reserved_12;               /* Reserved */
-       byte reserved_13;               /* Reserved */
-       byte reserved_14;               /* Reserved */
-       byte reserved_15;               /* Reserved */
+       unsigned        page_code       :6;     /* Page Code - Should be 0xf */
+       unsigned        reserved0       :1;     /* Reserved */
+       unsigned        ps              :1;
+       u8              page_length;            /* Page Length - Should be 14 */
+       unsigned        reserved2       :6;     /* Reserved */
+       unsigned        dcc             :1;     /* Data Compression Capable */
+       unsigned        dce             :1;     /* Data Compression Enable */
+       unsigned        reserved3       :5;     /* Reserved */
+       unsigned        red             :2;     /* Report Exception on Decompression */
+       unsigned        dde             :1;     /* Data Decompression Enable */
+       u32             ca;                     /* Compression Algorithm */
+       u32             da;                     /* Decompression Algorithm */
+       u8              reserved[4];            /* Reserved */
 } idetape_data_compression_page_t;
 
 /*
  *     The Medium Partition Page, as returned by the MODE SENSE packet command.
  */
-
 typedef struct {
-       unsigned page_code      :6;     /* Page Code - Should be 0x11 */
-       unsigned reserved1_6    :1;     /* Reserved */
-       unsigned ps             :1;
-       byte page_length;               /* Page Length - Should be 6 */
-       byte map;                       /* Maximum Additional Partitions - Should be 0 */
-       byte apd;                       /* Additional Partitions Defined - Should be 0 */
-       unsigned reserved4_012  :3;     /* Reserved */
-       unsigned psum           :2;     /* Should be 0 */
-       unsigned idp            :1;     /* Should be 0 */
-       unsigned sdp            :1;     /* Should be 0 */
-       unsigned fdp            :1;     /* Fixed Data Partitions */
-       byte mfr;                       /* Medium Format Recognition */
-       byte reserved6;                 /* Reserved */
-       byte reserved7;                 /* Reserved */
+       unsigned        page_code       :6;     /* Page Code - Should be 0x11 */
+       unsigned        reserved1_6     :1;     /* Reserved */
+       unsigned        ps              :1;
+       u8              page_length;            /* Page Length - Should be 6 */
+       u8              map;                    /* Maximum Additional Partitions - Should be 0 */
+       u8              apd;                    /* Additional Partitions Defined - Should be 0 */
+       unsigned        reserved4_012   :3;     /* Reserved */
+       unsigned        psum            :2;     /* Should be 0 */
+       unsigned        idp             :1;     /* Should be 0 */
+       unsigned        sdp             :1;     /* Should be 0 */
+       unsigned        fdp             :1;     /* Fixed Data Partitions */
+       u8              mfr;                    /* Medium Format Recognition */
+       u8              reserved[2];            /* Reserved */
 } idetape_medium_partition_page_t;
 
-/*
- *     Prototypes of various functions in ide-tape.c
- *
- *     The following functions are called from ide.c, and their prototypes
- *     are available in ide.h:
- *
- *             idetape_identify_device
- *             idetape_setup
- *             idetape_blkdev_ioctl
- *             idetape_do_request
- *             idetape_blkdev_open
- *             idetape_blkdev_release
- *             idetape_register_chrdev (void);
- */
+#define IDETAPE_MIN(a,b)       ((a)<(b) ? (a):(b))
+#define        IDETAPE_MAX(a,b)        ((a)>(b) ? (a):(b))
 
 /*
- *     The following functions are used to transfer data from / to the
- *     tape's data register.
+ *     Run time configurable parameters.
  */
-void idetape_input_data (ide_drive_t *drive,void *buffer, unsigned long bcount);
-void idetape_output_data (ide_drive_t *drive,void *buffer, unsigned long bcount);
-void idetape_discard_data (ide_drive_t *drive, unsigned long bcount);
+typedef struct {
+       int     dsc_rw_frequency;
+       int     dsc_media_access_frequency;
+       int     nr_stages;
+} idetape_config_t;
 
 /*
- *     Packet command related functions.
+ *     The variables below are used for the character device interface.
+ *     Additional state variables are defined in our ide_drive_t structure.
  */
-void idetape_issue_packet_command  (ide_drive_t *drive,idetape_packet_command_t *pc,ide_handler_t *handler);
-void idetape_pc_intr (ide_drive_t *drive);
+static idetape_chrdev_t idetape_chrdevs[MAX_HWIFS * MAX_DRIVES];
+static int idetape_chrdev_present = 0;
 
 /*
- *     DSC handling functions.
+ *     Too bad. The drive wants to send us data which we are not ready to accept.
+ *     Just throw it away.
  */
-void idetape_postpone_request (ide_drive_t *drive);
-void idetape_poll_for_dsc (unsigned long data);
-void idetape_poll_for_dsc_direct (unsigned long data);
-void idetape_put_back_postponed_request (ide_drive_t *drive);
-void idetape_media_access_finished (ide_drive_t *drive);
+static void idetape_discard_data (ide_drive_t *drive, unsigned int bcount)
+{
+       while (bcount--)
+               IN_BYTE (IDE_DATA_REG);
+}
 
-/*
- *     Some more packet command related functions.
- */
-void idetape_pc_callback (ide_drive_t *drive);
-void idetape_retry_pc (ide_drive_t *drive);
-void idetape_zero_packet_command (idetape_packet_command_t *pc);
-void idetape_queue_pc_head (ide_drive_t *drive,idetape_packet_command_t *pc,struct request *rq);
-void idetape_analyze_error (ide_drive_t *drive,idetape_request_sense_result_t *result);
+static void idetape_input_buffers (ide_drive_t *drive, idetape_pc_t *pc, unsigned int bcount)
+{
+       struct buffer_head *bh = pc->bh;
+       int count;
+       
+       while (bcount) {
+#if IDETAPE_DEBUG_BUGS
+               if (bh == NULL) {
+                       printk (KERN_ERR "ide-tape: bh == NULL in idetape_input_buffers\n");
+                       idetape_discard_data (drive, bcount);
+                       return;
+               }
+#endif /* IDETAPE_DEBUG_BUGS */
+               count = IDETAPE_MIN (bh->b_size - bh->b_count, bcount);
+               atapi_input_bytes (drive, bh->b_data + bh->b_count, count);
+               bcount -= count; bh->b_count += count;
+               if (bh->b_count == bh->b_size) {
+                       bh = bh->b_reqnext;
+                       if (bh)
+                               bh->b_count = 0;
+               }
+       }
+       pc->bh = bh;
+}
+
+static void idetape_output_buffers (ide_drive_t *drive, idetape_pc_t *pc, unsigned int bcount)
+{
+       struct buffer_head *bh = pc->bh;
+       int count;
+       
+       while (bcount) {
+#if IDETAPE_DEBUG_BUGS
+               if (bh == NULL) {
+                       printk (KERN_ERR "ide-tape: bh == NULL in idetape_output_buffers\n");
+                       return;
+               }
+#endif /* IDETAPE_DEBUG_BUGS */
+               count = IDETAPE_MIN (pc->b_count, bcount);
+               atapi_output_bytes (drive, pc->b_data, count);
+               bcount -= count; pc->b_data += count; pc->b_count -= count;
+               if (!pc->b_count) {
+                       pc->bh = bh = bh->b_reqnext;
+                       if (bh) {
+                               pc->b_data = bh->b_data;
+                               pc->b_count = bh->b_count;
+                       }
+               }
+       }
+}
 
-idetape_packet_command_t *idetape_next_pc_storage (ide_drive_t *drive);
-struct request *idetape_next_rq_storage (ide_drive_t *drive);
+#ifdef CONFIG_BLK_DEV_TRITON
+static void idetape_update_buffers (idetape_pc_t *pc)
+{
+       struct buffer_head *bh = pc->bh;
+       int count, bcount = pc->actually_transferred;
 
-/*
- *     Various packet commands
- */
-void idetape_create_inquiry_cmd (idetape_packet_command_t *pc);
-void idetape_inquiry_callback (ide_drive_t *drive);
-void idetape_create_locate_cmd (idetape_packet_command_t *pc,unsigned long block,byte partition);
-void idetape_create_rewind_cmd (idetape_packet_command_t *pc);
-void idetape_create_write_filemark_cmd (idetape_packet_command_t *pc,int write_filemark);
-void idetape_create_load_unload_cmd (idetape_packet_command_t *pc,int cmd);
-void idetape_create_space_cmd (idetape_packet_command_t *pc,long count,byte cmd);
-void idetape_create_erase_cmd (idetape_packet_command_t *pc);
-void idetape_create_test_unit_ready_cmd (idetape_packet_command_t *pc);
-void idetape_create_read_position_cmd (idetape_packet_command_t *pc);
-void idetape_read_position_callback (ide_drive_t *drive);
-void idetape_create_read_cmd (idetape_packet_command_t *pc,unsigned long length);
-void idetape_read_callback (ide_drive_t *drive);
-void idetape_create_write_cmd (idetape_packet_command_t *pc,unsigned long length);
-void idetape_write_callback (ide_drive_t *drive);
-void idetape_create_request_sense_cmd (idetape_packet_command_t *pc);
-void idetape_create_mode_sense_cmd (idetape_packet_command_t *pc,byte page_code);
-void idetape_request_sense_callback (ide_drive_t *drive);
-
-void idetape_display_inquiry_result (byte *buffer);
+       if (test_bit (PC_WRITING, &pc->flags))
+               return;
+       while (bcount) {
+#if IDETAPE_DEBUG_BUGS
+               if (bh == NULL) {
+                       printk (KERN_ERR "ide-tape: bh == NULL in idetape_update_buffers\n");
+                       return;
+               }
+#endif /* IDETAPE_DEBUG_BUGS */
+               count = IDETAPE_MIN (bh->b_size, bcount);
+               bh->b_count = count;
+               if (bh->b_count == bh->b_size)
+                       bh = bh->b_reqnext;
+               bcount -= count;
+       }
+       pc->bh = bh;
+}
+#endif /* CONFIG_BLK_DEV_TRITON */
 
 /*
- *     Character device callback functions.
+ *     idetape_poll_for_dsc gets invoked by a timer (which was set
+ *     by idetape_postpone_request) to reinsert our postponed request
+ *     into the request queue.
  *
- *     We currently support:
+ *     Note that the procedure done here is different than the method
+ *     we are using in idetape_queue_pc_head - There we are putting
+ *     request(s) before our currently called request.
  *
- *             OPEN, RELEASE, READ, WRITE and IOCTL.
- */
-
-int idetape_chrdev_read (struct inode *inode, struct file *file, char *buf, int count);
-int idetape_chrdev_write (struct inode *inode, struct file *file, const char *buf, int count);
-int idetape_chrdev_ioctl (struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg);
-int idetape_chrdev_open (struct inode *inode, struct file *file);
-void idetape_chrdev_release (struct inode *inode,struct file *file);
-
-/*
- *     idetape_mtioctop implements general magnetic tape io control
- *     commands, as defined in include/linux/mtio.h. Those commands are
- *     accessed through the character device interface, using the MTIOCTOP
- *     ioctl.
- */
-int idetape_mtioctop (ide_drive_t *drive,short mt_op,int mt_count);
-
-/*
- *     idetape_space_over_filemarks handles the MTFSF, MTFSFM, ... mtio.h
- *     commands.
- */
-int idetape_space_over_filemarks (ide_drive_t *drive,short mt_op,int mt_count);
-
-/*
- *     idetape_add_chrdev_read_request is called from idetape_chrdev_read
- *     to service a character device read request and add read-ahead
- *     requests to our pipeline.
+ *     Here, on the other hand, HWGROUP(drive)->rq is not our request
+ *     but rather a request to another device. Therefore, we will let
+ *     it finish and only then service our postponed request --> We don't
+ *     touch HWGROUP(drive)->rq.
  */
-int idetape_add_chrdev_read_request (ide_drive_t *drive,int blocks,char *buffer);
-
-/*
- *     idetape_add_chrdev_write_request adds a character device write
- *     request to the pipeline.
- */
-int idetape_add_chrdev_write_request (ide_drive_t *drive,int blocks,char *buffer);
+static void idetape_poll_for_dsc (unsigned long data)
+{
+       ide_drive_t *drive=(ide_drive_t *) data;
+       idetape_tape_t *tape = drive->driver_data;
 
-/*
- *     idetape_queue_rw_tail will add a command to the tail of the device
- *     request queue and wait for it to finish. This is used when we
- *     can not allocate pipeline stages (or in non-pipelined mode).
- */
-int idetape_queue_rw_tail (ide_drive_t *drive,int cmd,int blocks,char *buffer);
+       del_timer (&tape->dsc_timer);
 
-/*
- *     Adds a packet command request to the tail of the device request
- *     queue and waits for it to be serviced.
- */
-int idetape_queue_pc_tail (ide_drive_t *drive,idetape_packet_command_t *pc);
+#if IDETAPE_DEBUG_LOG
+       printk (KERN_INFO "ide-tape: Putting back postponed request\n");
+#endif /* IDETAPE_DEBUG_LOG */
+#if IDETAPE_DEBUG_BUGS
+       if (tape->postponed_rq == NULL) {
+               printk (KERN_ERR "tape->postponed_rq is NULL in idetape_poll_for_dsc\n");
+               return;
+       }
+#endif /* IDETAPE_DEBUG_BUGS */
 
-int idetape_position_tape (ide_drive_t *drive,unsigned long block);
-int idetape_rewind_tape (ide_drive_t *drive);
-int idetape_flush_tape_buffers (ide_drive_t *drive);
+       (void) ide_do_drive_cmd (drive, tape->postponed_rq, ide_next);
+}
 
 /*
- *     Used to get device information
+ *     idetape_postpone_request postpones the current request so that
+ *     ide.c will be able to service requests from another device on
+ *     the same hwgroup while we are polling for DSC.
  */
+static void idetape_postpone_request (ide_drive_t *drive)
+{
+       idetape_tape_t *tape = drive->driver_data;
+       struct request *rq;
+       
+#if IDETAPE_DEBUG_LOG
+       printk (KERN_INFO "Reached idetape_postpone_request\n");
+#endif /* IDETAPE_DEBUG_LOG */
+#if IDETAPE_DEBUG_BUGS
+       if (tape->postponed_rq != NULL)
+               printk (KERN_ERR "ide-tape.c bug - postponed_rq not NULL in idetape_postpone_request\n");
+#endif /* IDETAPE_DEBUG_BUGS */
 
-void idetape_get_mode_sense_results (ide_drive_t *drive);
+       /*
+        *      Set the timer parameters.
+        */
+       tape->dsc_timer.expires=jiffies + tape->dsc_polling_frequency;
+       tape->dsc_timer.data=(unsigned long) drive;
+       tape->dsc_timer.function = &idetape_poll_for_dsc;
+       init_timer (&tape->dsc_timer);
 
-/*
- *     General utility functions
- */
-unsigned long idetape_swap_long (unsigned long temp);
-unsigned short idetape_swap_short (unsigned short temp);
+       /*
+        * Remove current request from the request queue:
+        */
+       tape->postponed_rq = rq = HWGROUP(drive)->rq;
+       rq->rq_status = IDETAPE_RQ_POSTPONED;
+       blk_dev[MAJOR(rq->rq_dev)].current_request = rq->next;
+       HWGROUP(drive)->rq = NULL;
 
-#define IDETAPE_MIN(a,b)       ((a)<(b) ? (a):(b))
+       add_timer(&tape->dsc_timer);            /* Activate the polling timer */
+}
 
 /*
- *     Pipeline related functions
- */
-
-idetape_pipeline_stage_t *idetape_kmalloc_stage (ide_drive_t *drive);
-void idetape_kfree_stage (idetape_pipeline_stage_t *stage);
-void idetape_copy_buffer_from_stage (idetape_pipeline_stage_t *stage,char *buffer);
-void idetape_copy_buffer_to_stage (idetape_pipeline_stage_t *stage,char *buffer);
-void idetape_increase_max_pipeline_stages (ide_drive_t *drive);
-void idetape_add_stage_tail (ide_drive_t *drive,idetape_pipeline_stage_t *stage);
-void idetape_remove_stage_head (ide_drive_t *drive);
-void idetape_active_next_stage (ide_drive_t *drive);
-void idetape_wait_for_pipeline (ide_drive_t *drive);
-void idetape_discard_read_pipeline (ide_drive_t *drive);
-void idetape_empty_write_pipeline (ide_drive_t *drive);
-void idetape_insert_pipeline_into_queue (ide_drive_t *drive);
-
-/*
- *     For general magnetic tape device compatibility.
- */
-#include <linux/mtio.h>
-
-/*
- *     Global variables
+ *     idetape_queue_pc_head generates a new packet command request in front
+ *     of the request queue, before the current request, so that it will be
+ *     processed immediately, on the next pass through the driver.
  *
- *     The variables below are used for the character device interface.
+ *     idetape_queue_pc_head is called from the request handling part of
+ *     the driver (the "bottom" part). Safe storage for the request should
+ *     be allocated with idetape_next_pc_storage and idetape_next_rq_storage
+ *     before calling idetape_queue_pc_head.
  *
- *     Additional state variables are defined in our ide_drive_t structure.
+ *     Memory for those requests is pre-allocated at initialization time, and
+ *     is limited to IDETAPE_PC_STACK requests. We assume that we have enough
+ *     space for the maximum possible number of inter-dependent packet commands.
+ *
+ *     The higher level of the driver - The ioctl handler and the character
+ *     device handling functions should queue request to the lower level part
+ *     and wait for their completion using idetape_queue_pc_tail or
+ *     idetape_queue_rw_tail.
  */
-idetape_chrdev_t idetape_chrdev;               /* Character device interface information */
-byte idetape_drive_already_found=0;            /* 1 when the above data structure is initialized */
+static void idetape_queue_pc_head (ide_drive_t *drive,idetape_pc_t *pc,struct request *rq)
+{
+       unsigned int major = HWIF(drive)->major;
+       struct blk_dev_struct *bdev = &blk_dev[major];
 
-/*
- *     Our character device supporting functions, passed to register_chrdev.
- */
-static struct file_operations idetape_fops = {
-       NULL,                   /* lseek - default */
-       idetape_chrdev_read,    /* read  */
-       idetape_chrdev_write,   /* write */
-       NULL,                   /* readdir - bad */
-       NULL,                   /* select */
-       idetape_chrdev_ioctl,   /* ioctl */
-       NULL,                   /* mmap */
-       idetape_chrdev_open,    /* open */
-       idetape_chrdev_release, /* release */
-       NULL,                   /* fsync */
-       NULL,                   /* fasync */
-       NULL,                   /* check_media_change */
-       NULL                    /* revalidate */
-};
+       bdev->current_request=HWGROUP (drive)->rq;              /* Since we may have taken it out */
 
+       ide_init_drive_cmd (rq);
+       rq->buffer = (char *) pc;
+       rq->cmd = IDETAPE_PC_RQ;
+       (void) ide_do_drive_cmd (drive, rq, ide_preempt);
+}
 
 /*
- *     idetape_identify_device is called by do_identify in ide.c during
- *     the device probing stage to check the contents of the ATAPI IDENTIFY
- *     command results, in case the device type is tape. We return:
- *
- *     1       If the tape can be supported by us, based on the information
- *             we have so far.
- *
- *     0       If this tape driver is not currently supported by us.
- *
- *     In case we decide to support the tape, we store the current drive
- *     pointer in our character device global variables, so that we can
- *     pass between both interfaces.
+ *     idetape_next_pc_storage returns a pointer to a place in which we can
+ *     safely store a packet command, even though we intend to leave the
+ *     driver. A storage space for a maximum of IDETAPE_PC_STACK packet
+ *     commands is allocated at initialization time.
  */
-int idetape_identify_device (ide_drive_t *drive,struct hd_driveid *id)
-
+static idetape_pc_t *idetape_next_pc_storage (ide_drive_t *drive)
 {
-       struct idetape_id_gcw gcw;
-       unsigned short *ptr;
-       int support=1;
-#if IDETAPE_DEBUG_LOG
-       unsigned short mask,i;
-#endif /* IDETAPE_DEBUG_LOG */
-               
-       ptr=(unsigned short *) &gcw;
-       *ptr=id->config;
+       idetape_tape_t *tape = drive->driver_data;
 
 #if IDETAPE_DEBUG_LOG
-       printk ("Dumping ATAPI Identify Device tape parameters\n");
-       
-       printk ("Protocol Type: ");
-       switch (gcw.protocol) {
-               case 0: case 1: printk ("ATA\n");break;
-               case 2: printk ("ATAPI\n");break;
-               case 3: printk ("Reserved (Unknown to ide-tape)\n");break;
-       }
-       
-       printk ("Device Type: %x - ",gcw.device_type);  
-       switch (gcw.device_type) {
-               case 0: printk ("Direct-access Device\n");break;
-               case 1: printk ("Streaming Tape Device\n");break;
-               case 2: case 3: case 4: printk ("Reserved\n");break;
-               case 5: printk ("CD-ROM Device\n");break;
-               case 6: printk ("Reserved\n");
-               case 7: printk ("Optical memory Device\n");break;
-               case 0x1f: printk ("Unknown or no Device type\n");break;
-               default: printk ("Reserved\n");
-       }
-       printk ("Removable: %s",gcw.removable ? "Yes\n":"No\n");        
-               
-       printk ("Command Packet DRQ Type: ");
-       switch (gcw.drq_type) {
-               case 0: printk ("Microprocessor DRQ\n");break;
-               case 1: printk ("Interrupt DRQ\n");break;
-               case 2: printk ("Accelerated DRQ\n");break;
-               case 3: printk ("Reserved\n");break;
-       }
-       
-       printk ("Command Packet Size: ");
-       switch (gcw.packet_size) {
-               case 0: printk ("12 bytes\n");break;
-               case 1: printk ("16 bytes\n");break;
-               default: printk ("Reserved\n");break;
-       }
-       printk ("Model: %s\n",id->model);
-       printk ("Firmware Revision: %s\n",id->fw_rev);
-       printk ("Serial Number: %s\n",id->serial_no);
-       printk ("Write buffer size: %d bytes\n",id->buf_size*512);
-       printk ("DMA: %s",id->capability & 0x01 ? "Yes\n":"No\n");
-       printk ("LBA: %s",id->capability & 0x02 ? "Yes\n":"No\n");
-       printk ("IORDY can be disabled: %s",id->capability & 0x04 ? "Yes\n":"No\n");
-       printk ("IORDY supported: %s",id->capability & 0x08 ? "Yes\n":"Unknown\n");
-       printk ("ATAPI overlap supported: %s",id->capability & 0x20 ? "Yes\n":"No\n");
-       printk ("PIO Cycle Timing Category: %d\n",id->tPIO);
-       printk ("DMA Cycle Timing Category: %d\n",id->tDMA);
-       printk ("Single Word DMA supported modes: ");
-       for (i=0,mask=1;i<8;i++,mask=mask << 1) {
-               if (id->dma_1word & mask)
-                       printk ("%d ",i);
-               if (id->dma_1word & (mask << 8))
-                       printk ("(active) ");
-       }
-       printk ("\n");
-
-       printk ("Multi Word DMA supported modes: ");
-       for (i=0,mask=1;i<8;i++,mask=mask << 1) {
-               if (id->dma_mword & mask)
-                       printk ("%d ",i);
-               if (id->dma_mword & (mask << 8))
-                       printk ("(active) ");
-       }
-       printk ("\n");
-
-       if (id->field_valid & 0x0002) {
-               printk ("Enhanced PIO Modes: %s\n",id->eide_pio_modes & 1 ? "Mode 3":"None");
-               printk ("Minimum Multi-word DMA cycle per word: ");
-               if (id->eide_dma_min == 0)
-                       printk ("Not supported\n");
-               else
-                       printk ("%d ns\n",id->eide_dma_min);
-
-               printk ("Manufacturer\'s Recommended Multi-word cycle: ");
-               if (id->eide_dma_time == 0)
-                       printk ("Not supported\n");
-               else
-                       printk ("%d ns\n",id->eide_dma_time);
-
-               printk ("Minimum PIO cycle without IORDY: ");
-               if (id->eide_pio == 0)
-                       printk ("Not supported\n");
-               else
-                       printk ("%d ns\n",id->eide_pio);
-
-               printk ("Minimum PIO cycle with IORDY: ");
-               if (id->eide_pio_iordy == 0)
-                       printk ("Not supported\n");
-               else
-                       printk ("%d ns\n",id->eide_pio_iordy);
-               
-       }
-
-       else {
-               printk ("According to the device, fields 64-70 are not valid.\n");
-       }
+       printk (KERN_INFO "ide-tape: pc_stack_index=%d\n",tape->pc_stack_index);
 #endif /* IDETAPE_DEBUG_LOG */
-
-       /* Check that we can support this device */
-
-       if (gcw.protocol !=2 ) {
-               printk ("ide-tape: Protocol is not ATAPI\n");support=0;
-       }
-
-       if (gcw.device_type != 1) {
-               printk ("ide-tape: Device type is not set to tape\n");support=0;
-       }
-
-       if (!gcw.removable) {
-               printk ("ide-tape: The removable flag is not set\n");support=0;
-       }
-
-       if (gcw.drq_type != 2) {
-               printk ("ide-tape: Sorry, DRQ types other than Accelerated DRQ\n");
-               printk ("ide-tape: are still not supported by the driver\n");support=0;
-       }
-
-       if (gcw.packet_size != 0) {
-               printk ("ide-tape: Packet size is not 12 bytes long\n");
-               if (gcw.packet_size == 1)
-                       printk ("ide-tape: Sorry, padding to 16 bytes is still not supported\n");
-               support=0;                      
-       }
-
-       if (idetape_drive_already_found) {
-               printk ("ide-tape: Sorry, only one ide tape drive is supported by the driver\n");
-               support=0;
-       }
-       else {
-               idetape_drive_already_found=1;
-               idetape_chrdev.drive=drive;
-               idetape_chrdev.major=IDETAPE_MAJOR;
-               idetape_chrdev.minor=0;
-               idetape_chrdev.name[0]='h';
-               idetape_chrdev.name[1]='t';
-               idetape_chrdev.name[2]='0';
-               idetape_chrdev.name[3]=0;
-       }
-
-       return (support);               /* In case support=0, we will not install the driver */
+       if (tape->pc_stack_index==IDETAPE_PC_STACK)
+               tape->pc_stack_index=0;
+       return (&tape->pc_stack[tape->pc_stack_index++]);
 }
 
 /*
- *     idetape_register_chrdev calls register_chrdev to register our character
- *     device interface. The connection to the ide_drive_t structure, which
- *     is used by the entire ide driver is provided by our global variable
- *     idetape_chrdev.drive, which was initialized earlier, during the device
- *     probing stage.
+ *     idetape_next_rq_storage is used along with idetape_next_pc_storage.
+ *     Since we queue packet commands in the request queue, we need to
+ *     allocate a request, along with the allocation of a packet command.
  */
  
-void idetape_register_chrdev (void)
-
+/**************************************************************
+ *                                                            *
+ *  This should get fixed to use kmalloc(GFP_ATOMIC, ..)      *
+ *  followed later on by kfree().   -ml                       *
+ *                                                            *
+ **************************************************************/
+static struct request *idetape_next_rq_storage (ide_drive_t *drive)
 {
-       int major,minor;
-       ide_drive_t *drive;
-
-       if (!idetape_drive_already_found)
-               return;
+       idetape_tape_t *tape = drive->driver_data;
 
-       drive=idetape_chrdev.drive;
-       major=idetape_chrdev.major;
-       minor=idetape_chrdev.minor;
-       
-       if (register_chrdev (major,idetape_chrdev.name,&idetape_fops)) {
-               printk ("Unable to register character device interface !\n");
-               /* ??? */
-       }
-       else {
-               printk ("ide-tape: %s <-> %s : Character device interface on major = %d\n",
-                       drive->name,idetape_chrdev.name,major);
-       }
+#if IDETAPE_DEBUG_LOG
+       printk (KERN_INFO "ide-tape: rq_stack_index=%d\n",tape->rq_stack_index);
+#endif /* IDETAPE_DEBUG_LOG */
+       if (tape->rq_stack_index==IDETAPE_PC_STACK)
+               tape->rq_stack_index=0;
+       return (&tape->rq_stack[tape->rq_stack_index++]);
 }
 
 /*
- *     idetape_setup is called from the ide driver in the partition table
- *     identification stage, to:
- *
- *             1.      Initialize our various state variables.
- *             2.      Ask the tape for its capabilities.
- *             3.      Allocate a buffer which will be used for data
- *                     transfer. The buffer size is chosen based on
- *                     the recommendation which we received in step (2).
- *
- *     Note that at this point ide.c already assigned us an irq, so that
- *     we can queue requests here and wait for their completion.
+ *     Pipeline related functions
  */
-void idetape_setup (ide_drive_t *drive)
 
+static inline int idetape_pipeline_active (idetape_tape_t *tape)
 {
-       idetape_tape_t *tape=&(drive->tape);
-       unsigned int allocation_length;
-#if IDETAPE_ANTICIPATE_READ_WRITE_DSC
-       ide_hwif_t *hwif = HWIF(drive);
-       unsigned long t1, tmid, tn;
-#endif /* IDETAPE_ANTICIPATE_READ_WRITE_DSC */
-
-#if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: Reached idetape_setup\n");
-#endif /* IDETAPE_DEBUG_LOG */ 
-       
-       drive->ready_stat = 0;                  /* With an ATAPI device, we can issue packet commands */
-                                               /* regardless of the state of DRDY */
-       HWIF(drive)->tape_drive=drive;
-
-       tape->block_address=0;                  
-       tape->block_address_valid=0;
-       tape->pc_stack_index=0;
-       tape->failed_pc=NULL;
-       tape->postponed_rq=NULL;
-       tape->busy=0;
-       tape->active_data_request=NULL;
-       tape->current_number_of_stages=0;
-       tape->first_stage=tape->next_stage=tape->last_stage=NULL;
-       tape->error_in_pipeline_stage=0;
-       tape->request_status=0;
-       tape->chrdev_direction=idetape_direction_none;
-       tape->reset_issued=0;
-       tape->pc=&(tape->pc_stack [0]);
-       
-#if IDETAPE_PIPELINE
-       tape->max_number_of_stages=IDETAPE_MIN_PIPELINE_STAGES;
-       printk ("ide-tape: Operating in pipelined (fast and tricky) operation mode.\n");
-#else
-       tape->max_number_of_stages=0;
-       printk ("ide-tape: Operating in non-pipelined (slow and safe) operation mode.\n");
-#endif /* IDETAPE_PIPELINE */
-
-       idetape_get_mode_sense_results (drive);
-
-       tape->data_buffer_size = tape->capabilities.ctl * tape->tape_block_size;
-       while (tape->data_buffer_size > 0xffff) {
-               tape->capabilities.ctl /= 2;
-               tape->data_buffer_size = tape->capabilities.ctl * tape->tape_block_size;
-       }
-       allocation_length=tape->data_buffer_size;
-       if (tape->data_buffer_size % IDETAPE_ALLOCATION_BLOCK)
-               allocation_length+=IDETAPE_ALLOCATION_BLOCK;
-       
-#if IDETAPE_MINIMIZE_IDLE_MEMORY_USAGE
-       tape->data_buffer=tape->merge_buffer=NULL;
-#else
-       tape->data_buffer=kmalloc (allocation_length,GFP_KERNEL);
-       tape->merge_buffer=kmalloc (allocation_length,GFP_KERNEL);
-       if (tape->data_buffer == NULL || tape->merge_buffer == NULL) {
-               printk ("ide-tape: FATAL - Can not allocate 2 buffers of %d bytes each\n",allocation_length);
-               printk ("ide-tape: Aborting character device installation\n");
-               idetape_drive_already_found=0;
-               unregister_chrdev (idetape_chrdev.major,idetape_chrdev.name);
-               return;
-       }
-#endif /* IDETAPE_MINIMIZE_IDLE_MEMORY_USAGE */
-
-       tape->merge_buffer_size=tape->merge_buffer_offset=0;
-       
-#if IDETAPE_ANTICIPATE_READ_WRITE_DSC
-
-       /*
-        *      Cleverly select the DSC read/write polling frequency, based
-        *      on the tape's speed, its recommended transfer unit, its
-        *      internal buffer size and our operation mode.
-        *
-        *      In the pipelined operation mode we aim for "catching" the
-        *      tape when its internal buffer is about 50% full. This will
-        *      dramatically reduce our polling frequency and will also
-        *      leave enough time for the ongoing request of the other device
-        *      to complete before the buffer is completely empty. We will
-        *      then completely refill the buffer with requests from our
-        *      internal pipeline.
-        *
-        *      When operating in the non-pipelined operation mode, we
-        *      can't allow ourself this luxury. Instead, we will try to take
-        *      full advantage of the internal tape buffer by waiting only
-        *      for one request to complete. This will increase our load
-        *      on linux but will usually still fail to keep the tape
-        *      constantly streaming.
-        */
-
-       /*
-        *      We will ignore the above algorithm for now, as it can have
-        *      a bad effect on interactive response under some conditions.
-        *      The following attempts to find a balance between good latency
-        *      and good system throughput. It will be nice to have all this
-        *      configurable in run time at some point.
-        */
-       t1 = (tape->data_buffer_size * HZ) / (tape->capabilities.speed * 1000);
-       tmid = (tape->capabilities.buffer_size * 32 * HZ) / (tape->capabilities.speed * 125);
-       tn = (IDETAPE_FIFO_THRESHOLD * tape->data_buffer_size * HZ) / (tape->capabilities.speed * 1000);
-
-       if (tape->max_number_of_stages) {
-               if (drive->using_dma)
-                       tape->best_dsc_rw_frequency = tmid;
-               else {
-                       if (hwif->drives[drive->select.b.unit ^ 1].present || hwif->next != hwif)
-                               tape->best_dsc_rw_frequency = IDETAPE_MIN ((tn + tmid) / 2, tmid);
-                       else
-                               tape->best_dsc_rw_frequency = IDETAPE_MIN (tn, tmid);
-               }
-       } else
-               tape->best_dsc_rw_frequency = t1;
-
-       /*
-        *      Ensure that the number we got makes sense.
-        */
-
-       if (tape->best_dsc_rw_frequency > IDETAPE_DSC_READ_WRITE_LOWEST_FREQUENCY) {
-               printk ("ide-tape: Although the recommended polling period is %lu jiffies, \n",tape->best_dsc_rw_frequency);
-               printk ("ide-tape: we will use %u jiffies\n",IDETAPE_DSC_READ_WRITE_LOWEST_FREQUENCY);
-               printk ("ide-tape: (It may well be that we are wrong here)\n");
-               tape->best_dsc_rw_frequency = IDETAPE_DSC_READ_WRITE_LOWEST_FREQUENCY;
-       }
-
-       if (tape->best_dsc_rw_frequency < IDETAPE_DSC_READ_WRITE_FALLBACK_FREQUENCY) {
-               printk ("ide-tape: Although the recommended polling period is %lu jiffies, \n",tape->best_dsc_rw_frequency);
-               printk ("ide-tape: we will use %u jiffies\n",IDETAPE_DSC_READ_WRITE_FALLBACK_FREQUENCY);
-               tape->best_dsc_rw_frequency = IDETAPE_DSC_READ_WRITE_FALLBACK_FREQUENCY;
-       }
-
-#else
-       tape->best_dsc_rw_frequency=IDETAPE_DSC_READ_WRITE_FALLBACK_FREQUENCY;
-#endif /* IDETAPE_ANTICIPATE_READ_WRITE_DSC */
-
-       printk (KERN_INFO "ide-tape: %s <-> %s, %dKBps, %d*%dkB buffer, %dkB pipeline, %lums tDSC%s\n",
-               drive->name, "ht0", tape->capabilities.speed, (tape->capabilities.buffer_size * 512) / tape->data_buffer_size,
-               tape->data_buffer_size / 1024, tape->max_number_of_stages * tape->data_buffer_size / 1024,
-               tape->best_dsc_rw_frequency * 1000 / HZ, drive->using_dma ? ", DMA":"");
-       return;
+       return tape->active_data_request != NULL;
 }
 
 /*
- *     idetape_get_mode_sense_results asks the tape about its various
- *     parameters. In particular, we will adjust our data transfer buffer
- *     size to the recommended value as returned by the tape.
+ *     idetape_kfree_stage calls kfree to completely free a stage, along with
+ *     its related buffers.
  */
-
-void idetape_get_mode_sense_results (ide_drive_t *drive)
-
+static void __idetape_kfree_stage (idetape_stage_t *stage)
 {
-       int retval;
-       idetape_tape_t *tape=&(drive->tape);
-       idetape_mode_parameter_header_t *header;
-       idetape_capabilities_page_t *capabilities;
-       idetape_packet_command_t pc;
-       
-       idetape_create_mode_sense_cmd (&pc,IDETAPE_CAPABILITIES_PAGE);
-       pc.buffer=pc.temp_buffer;
-       pc.buffer_size=IDETAPE_TEMP_BUFFER_SIZE;
-       pc.current_position=pc.temp_buffer;
-       retval=idetape_queue_pc_tail (drive,&pc);
-
-       header=(idetape_mode_parameter_header_t *) pc.buffer;   
-       capabilities=(idetape_capabilities_page_t *) (pc.buffer+sizeof (idetape_mode_parameter_header_t));
-
-       capabilities->max_speed=idetape_swap_short (capabilities->max_speed);
-       capabilities->ctl=idetape_swap_short (capabilities->ctl);
-       capabilities->speed=idetape_swap_short (capabilities->speed);
-       capabilities->buffer_size=idetape_swap_short (capabilities->buffer_size);
-
-       tape->capabilities=*capabilities;               /* Save us a copy */
-       tape->tape_block_size=capabilities->blk512 ? 512:1024;
+       struct buffer_head *prev_bh, *bh = stage->bh;
+       int size;
 
-       if (retval) {
-               printk ("ide-tape: Can't get tape parameters\n");
-               printk ("ide-tape: Assuming some default parameters\n");
-               tape->tape_block_size=512;
-               tape->capabilities.ctl=52;
-               tape->capabilities.speed=450;
-               tape->capabilities.buffer_size=6*52;
-               return;
+       while (bh != NULL) {
+               if (bh->b_data != NULL) {
+                       size = (int) bh->b_size;
+                       while (size > 0) {
+                               free_page ((unsigned long) bh->b_data);
+                               size -= PAGE_SIZE;
+                               bh->b_data += PAGE_SIZE;
+                       }
+               }
+               prev_bh = bh;
+               bh = bh->b_reqnext;
+               kfree (prev_bh);
        }
+       kfree (stage);
+}
 
-#if IDETAPE_DEBUG_LOG
-       printk ("Dumping the results of the MODE SENSE packet command\n");
-       printk ("Mode Parameter Header:\n");
-       printk ("Mode Data Length - %d\n",header->mode_data_length);
-       printk ("Medium Type - %d\n",header->medium_type);
-       printk ("Device Specific Parameter - %d\n",header->dsp);
-       printk ("Block Descriptor Length - %d\n",header->bdl);
-       
-       printk ("Capabilities and Mechanical Status Page:\n");
-       printk ("Page code - %d\n",capabilities->page_code);
-       printk ("Page length - %d\n",capabilities->page_length);
-       printk ("Read only - %s\n",capabilities->ro ? "Yes":"No");
-       printk ("Supports reverse space - %s\n",capabilities->sprev ? "Yes":"No");
-       printk ("Supports erase initiated formatting - %s\n",capabilities->efmt ? "Yes":"No");
-       printk ("Supports QFA two Partition format - %s\n",capabilities->qfa ? "Yes":"No");
-       printk ("Supports locking the medium - %s\n",capabilities->lock ? "Yes":"No");
-       printk ("The volume is currently locked - %s\n",capabilities->locked ? "Yes":"No");
-       printk ("The device defaults in the prevent state - %s\n",capabilities->prevent ? "Yes":"No");
-       printk ("Supports ejecting the medium - %s\n",capabilities->eject ? "Yes":"No");
-       printk ("Supports error correction - %s\n",capabilities->ecc ? "Yes":"No");
-       printk ("Supports data compression - %s\n",capabilities->cmprs ? "Yes":"No");
-       printk ("Supports 512 bytes block size - %s\n",capabilities->blk512 ? "Yes":"No");
-       printk ("Supports 1024 bytes block size - %s\n",capabilities->blk1024 ? "Yes":"No");
-       printk ("Restricted byte count for PIO transfers - %s\n",capabilities->slowb ? "Yes":"No");
-       printk ("Maximum supported speed in KBps - %d\n",capabilities->max_speed);
-       printk ("Continuous transfer limits in blocks - %d\n",capabilities->ctl);
-       printk ("Current speed in KBps - %d\n",capabilities->speed);    
-       printk ("Buffer size - %d\n",capabilities->buffer_size*512);
-#endif /* IDETAPE_DEBUG_LOG */
+static void idetape_kfree_stage (idetape_tape_t *tape, idetape_stage_t *stage)
+{
+       if (tape->cache_stage == NULL)
+               tape->cache_stage = stage;
+       else
+               __idetape_kfree_stage (stage);
 }
 
 /*
- *     Packet Command Interface
- *
- *     The current Packet Command is available in tape->pc, and will not
- *     change until we finish handling it. Each packet command is associated
- *     with a callback function that will be called when the command is
- *     finished.
- *
- *     The handling will be done in three stages:
+ *     idetape_kmalloc_stage uses __get_free_page to allocate a pipeline
+ *     stage, along with all the necessary small buffers which together make
+ *     a buffer of size tape->stage_size (or a bit more). We attempt to
+ *     combine sequential pages as much as possible.
  *
- *     1.      idetape_issue_packet_command will send the packet command to the
- *             drive, and will set the interrupt handler to idetape_pc_intr.
- *
- *     2.      On each interrupt, idetape_pc_intr will be called. This step
- *             will be repeated until the device signals us that no more
- *             interrupts will be issued.
- *
- *     3.      ATAPI Tape media access commands have immediate status with a
- *             delayed process. In case of a successful initiation of a
- *             media access packet command, the DSC bit will be set when the
- *             actual execution of the command is finished. 
- *             Since the tape drive will not issue an interrupt, we have to
- *             poll for this event. In this case, we define the request as
- *             "low priority request" by setting rq_status to
- *             IDETAPE_RQ_POSTPONED,   set a timer to poll for DSC and exit
- *             the driver.
- *
- *             ide.c will then give higher priority to requests which
- *             originate from the other device, until will change rq_status
- *             to RQ_ACTIVE.
- *
- *     4.      When the packet command is finished, it will be checked for errors.
- *
- *     5.      In case an error was found, we queue a request sense packet command
- *             in front of the request queue and retry the operation up to
- *             IDETAPE_MAX_PC_RETRIES times.
- *
- *     6.      In case no error was found, or we decided to give up and not
- *             to retry again, the callback function will be called and then
- *             we will handle the next request.
+ *     Returns a pointer to the new allocated stage, or NULL if we
+ *     can't (or don't want to) allocate a stage.
  *
+ *     Pipeline stages are optional and are used to increase performance.
+ *     If we can't allocate them, we'll manage without them.
  */
-
-void idetape_issue_packet_command  (ide_drive_t *drive,idetape_packet_command_t *pc,ide_handler_t *handler)
-
-{
-       idetape_tape_t *tape;
-       idetape_bcount_reg_t bcount;
-       idetape_ireason_reg_t ireason;
-       int dma_ok=0;
-
-       tape=&(drive->tape);
-               
-#if IDETAPE_DEBUG_BUGS
-       if (tape->pc->c[0] == IDETAPE_REQUEST_SENSE_CMD && pc->c[0] == IDETAPE_REQUEST_SENSE_CMD) {
-               printk ("ide-tape: possible ide-tape.c bug - Two request sense in serial were issued\n");
-       }
-#endif /* IDETAPE_DEBUG_BUGS */
-
-       if (tape->failed_pc == NULL && pc->c[0] != IDETAPE_REQUEST_SENSE_CMD)
-               tape->failed_pc=pc;
-       tape->pc=pc;                                                    /* Set the current packet command */
-
-       if (pc->retries > IDETAPE_MAX_PC_RETRIES || pc->abort) {
-
-               /*
-                *      We will "abort" retrying a packet command in case
-                *      a legitimate error code was received (crossing a
-                *      filemark, or DMA error in the end of media, for
-                *      example).
-                */
-
-               if (!pc->abort) {
-                       printk ("ide-tape: %s: I/O error, ",drive->name);
-                       printk ("pc = %x, key = %x, asc = %x, ascq = %x\n",pc->c[0],tape->sense_key,tape->asc,tape->ascq);
-#if IDETAPE_DEBUG_LOG
-                       printk ("ide-tape: Maximum retries reached - Giving up\n");
-#endif /* IDETAPE_DEBUG_LOG */
-                       pc->error=1;                                    /* Giving up */
+static idetape_stage_t *__idetape_kmalloc_stage (idetape_tape_t *tape)
+{
+       idetape_stage_t *stage;
+       struct buffer_head *prev_bh, *bh;
+       int pages = tape->pages_per_stage;
+       char *b_data;
+
+       if ((stage = (idetape_stage_t *) kmalloc (sizeof (idetape_stage_t),GFP_KERNEL)) == NULL)
+               return NULL;
+       stage->next = NULL;
+
+       bh = stage->bh = (struct buffer_head *) kmalloc (sizeof (struct buffer_head), GFP_KERNEL);
+       if (bh == NULL)
+               goto abort;
+       bh->b_reqnext = NULL;
+       if ((bh->b_data = (char *) __get_free_page (GFP_KERNEL)) == NULL)
+               goto abort;
+       bh->b_size = PAGE_SIZE;
+       set_bit (BH_Lock, &bh->b_state);
+
+       while (--pages) {
+               if ((b_data = (char *) __get_free_page (GFP_KERNEL)) == NULL)
+                       goto abort;
+               if (bh->b_data == b_data + PAGE_SIZE && virt_to_bus (bh->b_data) == virt_to_bus (b_data) + PAGE_SIZE) {
+                       bh->b_size += PAGE_SIZE;
+                       bh->b_data -= PAGE_SIZE;
+                       continue;
                }
-               tape->failed_pc=NULL;
-#if IDETAPE_DEBUG_BUGS
-               if (pc->callback==NULL)
-                       printk ("ide-tape: ide-tape bug - Callback function not set !\n");
-               else
-#endif /* IDETAPE_DEBUG_BUGS */
-                       (*pc->callback)(drive);
-               return;
-       }
-
-#if IDETAPE_DEBUG_LOG
-       printk ("Retry number - %d\n",pc->retries);
-#endif /* IDETAPE_DEBUG_LOG */
-
-       pc->retries++;
-
-/*
- *     We no longer call ide_wait_stat to wait for the drive to be ready,
- *     as ide.c already does this for us in do_request.
- */
-       pc->actually_transferred=0;                                     /* We haven't transferred any data yet */
-       pc->current_position=pc->buffer;        
-       bcount.all=pc->request_transfer;                                /* Request to transfer the entire buffer at once */
-
-#ifdef CONFIG_BLK_DEV_TRITON
-       if (pc->dma_error) {
-               printk ("ide-tape: DMA disabled, reverting to PIO\n");
-               drive->using_dma=0;
-               pc->dma_error=0;
-       }
-       if (pc->request_transfer && pc->dma_recommended && drive->using_dma) {
-               dma_ok=!(HWIF(drive)->dmaproc(pc->writing ? ide_dma_write : ide_dma_read, drive));
-       }               
-#endif /* CONFIG_BLK_DEV_TRITON */
-
-       OUT_BYTE (drive->ctl,IDETAPE_CONTROL_REG);
-       OUT_BYTE (dma_ok ? 1:0,IDETAPE_FEATURES_REG);                   /* Use PIO/DMA */
-       OUT_BYTE (bcount.b.high,IDETAPE_BCOUNTH_REG);
-       OUT_BYTE (bcount.b.low,IDETAPE_BCOUNTL_REG);
-       OUT_BYTE (drive->select.all,IDETAPE_DRIVESEL_REG);
-       
-       ide_set_handler (drive,handler,WAIT_CMD);                       /* Set the interrupt routine */
-       OUT_BYTE (WIN_PACKETCMD,IDETAPE_ATACOMMAND_REG);                /* Issue the packet command */
-       if (ide_wait_stat (drive,DRQ_STAT,BUSY_STAT,WAIT_READY)) {      /* Wait for DRQ to be ready - Assuming Accelerated DRQ */       
-               /*
-                *      We currently only support tape drives which report
-                *      accelerated DRQ assertion. For this case, specs
-                *      allow up to 50us. We really shouldn't get here.
-                *
-                *      ??? Still needs to think what to do if we reach
-                *      here anyway.
-                */
-                
-               printk ("ide-tape: Strange, packet command initiated yet DRQ isn't asserted\n");
-               return;
-       }
-       
-       ireason.all=IN_BYTE (IDETAPE_IREASON_REG);
-       if (!ireason.b.cod || ireason.b.io) {
-               printk ("ide-tape: (IO,CoD) != (0,1) while issuing a packet command\n");
-               ide_do_reset (drive);
-               return;         
-       }
-               
-       ide_output_data (drive,pc->c,12/4);                     /* Send the actual packet */
-#ifdef CONFIG_BLK_DEV_TRITON
-       if ((pc->dma_in_progress=dma_ok)) {                     /* Begin DMA, if necessary */
-               pc->dma_error=0;
-               (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive));
-       }
-#endif /* CONFIG_BLK_DEV_TRITON */
-}
-
-/*
- *     idetape_pc_intr is the usual interrupt handler which will be called
- *     during a packet command. We will transfer some of the data (as
- *     requested by the drive) and will re-point interrupt handler to us.
- *     When data transfer is finished, we will act according to the
- *     algorithm described before idetape_issue_packet_command.
- *
- */
-
-void idetape_pc_intr (ide_drive_t *drive)
-
-{
-       idetape_tape_t *tape=&(drive->tape);
-       idetape_status_reg_t status;
-       idetape_bcount_reg_t bcount;
-       idetape_ireason_reg_t ireason;
-       idetape_packet_command_t *pc=tape->pc;
-       unsigned long temp;
-
-#ifdef CONFIG_BLK_DEV_TRITON
-       if (pc->dma_in_progress) {
-               if ((pc->dma_error=HWIF(drive)->dmaproc(ide_dma_status_bad, drive)))
-                       /*
-                        *      We will currently correct the following in
-                        *      idetape_analyze_error.
-                        */
-                       pc->actually_transferred=HWIF(drive)->dmaproc(ide_dma_transferred, drive);
-               else
-                       pc->actually_transferred=pc->request_transfer;
-               (void) (HWIF(drive)->dmaproc(ide_dma_abort, drive));    /* End DMA */
-#if IDETAPE_DEBUG_LOG
-               printk ("ide-tape: DMA finished\n");
-#endif /* IDETAPE_DEBUG_LOG */
-       }
-#endif /* CONFIG_BLK_DEV_TRITON */
-
-       status.all=IN_BYTE (IDETAPE_STATUS_REG);                /* Clear the interrupt */
-
-#if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: Reached idetape_pc_intr interrupt handler\n");
-#endif /* IDETAPE_DEBUG_LOG */ 
-
-       if (!status.b.drq) {                                    /* No more interrupts */
-#if IDETAPE_DEBUG_LOG
-               printk ("Packet command completed\n");
-               printk ("Total bytes transferred: %lu\n",pc->actually_transferred);
-#endif /* IDETAPE_DEBUG_LOG */
-               pc->dma_in_progress=0;
-                                               
-               sti ();
-
-               if (status.b.check || pc->dma_error) {                  /* Error detected */
-#if IDETAPE_DEBUG_LOG
-       /*
-        *      Without debugging, we only log an error if we decided to
-        *      give up retrying.
-        */
-                       printk ("ide-tape: %s: I/O error, ",drive->name);
-#endif /* IDETAPE_DEBUG_LOG */
-                       if (pc->c[0] == IDETAPE_REQUEST_SENSE_CMD) {
-                               printk ("ide-tape: I/O error in request sense command\n");
-                               ide_do_reset (drive);
-                               return;
-                       }                       
-                                               
-                       idetape_retry_pc (drive);                       /* Retry operation */
-                       return;
+               if (b_data == bh->b_data + bh->b_size && virt_to_bus (b_data) == virt_to_bus (bh->b_data) + bh->b_size) {
+                       bh->b_size += PAGE_SIZE;
+                       continue;
                }
-               pc->error=0;
-               if (pc->wait_for_dsc && !status.b.dsc) {                                /* Media access command */
-                       tape->dsc_polling_frequency=IDETAPE_DSC_FAST_MEDIA_ACCESS_FREQUENCY;
-                       idetape_postpone_request (drive);               /* Allow ide.c to handle other requests */
-                       return;
+               prev_bh = bh;
+               if ((bh = (struct buffer_head *) kmalloc (sizeof (struct buffer_head), GFP_KERNEL)) == NULL) {
+                       free_page ((unsigned long) b_data);
+                       goto abort;
                }
-               if (tape->failed_pc == pc)
-                       tape->failed_pc=NULL;
-#if IDETAPE_DEBUG_BUGS
-               if (pc->callback==NULL)                 
-                       printk ("ide-tape: ide-tape bug - Callback function not set !\n");
-               else
-#endif /* IDETAPE_DEBUG_BUGS */
-                       (*pc->callback)(drive);                 /* Command finished - Call the callback function */
-               return;
-       }
-#ifdef CONFIG_BLK_DEV_TRITON
-       if (pc->dma_in_progress) {
-               pc->dma_in_progress=0;
-               printk ("ide-tape: The tape wants to issue more interrupts in DMA mode\n");
-               printk ("ide-tape: DMA disabled, reverting to PIO\n");
-               drive->using_dma=0;
-               ide_do_reset (drive);
-               return;
+               bh->b_reqnext = NULL;
+               bh->b_data = b_data;
+               bh->b_size = PAGE_SIZE;
+               set_bit (BH_Lock, &bh->b_state);
+               prev_bh->b_reqnext = bh;
        }
-#endif /* CONFIG_BLK_DEV_TRITON */
-       bcount.b.high=IN_BYTE (IDETAPE_BCOUNTH_REG);                    /* Get the number of bytes to transfer */
-       bcount.b.low=IN_BYTE (IDETAPE_BCOUNTL_REG);                     /* on this interrupt */
-       ireason.all=IN_BYTE (IDETAPE_IREASON_REG);                      /* Read the interrupt reason register */
-
-       if (ireason.b.cod) {
-               printk ("ide-tape: CoD != 0 in idetape_pc_intr\n");
-               ide_do_reset (drive);
-               return;
-       }
-       if (ireason.b.io != !(pc->writing)) {                   /* Hopefully, we will never get here */
-               printk ("ide-tape: We wanted to %s, ",pc->writing ? "Write":"Read");
-               printk ("but the tape wants us to %s !\n",ireason.b.io ? "Read":"Write");
-               ide_do_reset (drive);
-               return;
-       }
-       
-       if (!pc->writing) {                                     /* Reading - Check that we have enough space */
-               temp=(unsigned long) pc->actually_transferred + bcount.all;
-               if ( temp > pc->request_transfer) {
-                       if (temp > pc->buffer_size) {
-                               printk ("ide-tape: The tape wants to send us more data than requested - discarding data\n");
-                               idetape_discard_data (drive,bcount.all);
-                               ide_set_handler (drive,&idetape_pc_intr,WAIT_CMD);
-                               return;
-                       }
-#if IDETAPE_DEBUG_LOG
-                       printk ("ide-tape: The tape wants to send us more data than requested - allowing transfer\n");
-#endif /* IDETAPE_DEBUG_LOG */
-               }
-       }
-#if IDETAPE_DEBUG_BUGS 
-       if (bcount.all && !pc->buffer) {        
-               printk ("ide-tape: ide-tape.c bug - Buffer not set in idetape_pc_intr. Discarding data.\n");
-               
-               if (!pc->writing) {
-                       printk ("ide-tape: Discarding data\n");
-                       idetape_discard_data (drive,bcount.all);
-                       ide_set_handler (drive,&idetape_pc_intr,WAIT_CMD);
-                       return;
-               }
-               else {  /* ??? */
-               }
-       }
-#endif /* IDETAPE_DEBUG_BUGS */
-       if (pc->writing)
-               idetape_output_data (drive,pc->current_position,bcount.all);    /* Write the current buffer */
-       else
-               idetape_input_data (drive,pc->current_position,bcount.all);     /* Read the current buffer */
-#if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: %s %d bytes\n",pc->writing ? "Wrote":"Received",bcount.all);
-#endif /* IDETAPE_DEBUG_LOG */
-       pc->actually_transferred+=bcount.all;                                   /* Update the current position */
-       pc->current_position+=bcount.all;
-
-       ide_set_handler (drive,&idetape_pc_intr,WAIT_CMD);              /* And set the interrupt handler again */
-}
-
-/*
- *     idetape_postpone_request postpones the current request so that
- *     ide.c will be able to service requests from another device on
- *     the same hwgroup while we are polling for DSC.
- */
-
-void idetape_postpone_request (ide_drive_t *drive)
-
-{
-       idetape_tape_t *tape=&(drive->tape);
-       struct request *rq;
-       idetape_status_reg_t status;
-       
-#if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_postpone_request\n");
-#endif /* IDETAPE_DEBUG_LOG */
-#if IDETAPE_DEBUG_BUGS
-       if (tape->postponed_rq != NULL)
-               printk ("ide-tape.c bug - postponed_rq not NULL in idetape_postpone_request\n");
-#endif /* IDETAPE_DEBUG_BUGS */
-
-       tape->dsc_timer.expires=jiffies + tape->dsc_polling_frequency;  /* Set timer to poll for */
-       tape->dsc_timeout=jiffies+IDETAPE_DSC_TIMEOUT;                  /* actual completion */
-       tape->dsc_timer.data=(unsigned long) drive;
-       tape->dsc_timer.function=&idetape_poll_for_dsc;
-       init_timer (&(tape->dsc_timer));
-
-       /*
-        * Remove current request from the request queue:
-        */
-
-       tape->postponed_rq = rq = HWGROUP(drive)->rq;
-       rq->rq_status = IDETAPE_RQ_POSTPONED;   
-       blk_dev[MAJOR(rq->rq_dev)].current_request = rq->next;
-       HWGROUP(drive)->rq = NULL;
-
-       /*
-        *      Check the status again - Maybe we can save one polling period.
-        */
-        
-       status.all=IN_BYTE (IDETAPE_STATUS_REG);
-       tape->last_status=status.all;
-       tape->request_status=1; 
-       
-       tape->dsc_polling_start=jiffies;
-       add_timer(&(tape->dsc_timer));          /* Activate the polling timer */
-}
-
-/*
- *     idetape_poll_for_dsc_direct is called from idetape_poll_for_dsc
- *     to handle the case in which we can safely communicate with the tape
- *     (since no other request for this hwgroup is active).
- */
-void idetape_poll_for_dsc_direct (unsigned long data)
-
-{
-       ide_drive_t *drive=(ide_drive_t *) data;
-       idetape_tape_t *tape=&(drive->tape);
-       idetape_status_reg_t status;
-
-#if IDETAPE_DEBUG_LOG
-       printk ("%s: idetape_poll_for_dsc_direct called\n",drive->name);
-#endif /* IDETAPE_DEBUG_LOG */ 
-
-       OUT_BYTE(drive->select.all,IDE_SELECT_REG);
-       status.all=IN_BYTE (IDETAPE_STATUS_REG);
-       
-       if (status.b.dsc) {                                     /* DSC received */
-               tape->dsc_received=1;
-               del_timer (&(tape->dsc_timer));                 /* Stop polling and put back the postponed */
-               idetape_put_back_postponed_request (drive);     /* request in the request queue */
-               return;
-       }
-
-       if (jiffies > tape->dsc_timeout)        { /* Timeout */
-               tape->dsc_received=0;
-               del_timer (&(tape->dsc_timer));
-               /* ??? */
-               idetape_put_back_postponed_request (drive);
-               return;
-       }
-
-       /* Poll again */
-       
-       if (jiffies - tape->dsc_polling_start > IDETAPE_FAST_SLOW_THRESHOLD)
-               tape->dsc_timer.expires = jiffies + IDETAPE_DSC_SLOW_MEDIA_ACCESS_FREQUENCY;
-       else
-               tape->dsc_timer.expires = jiffies + tape->dsc_polling_frequency;
-       add_timer(&(tape->dsc_timer));
-       return;
-}
-
-/*
- *     idetape_poll_for_dsc gets invoked by a timer (which was set
- *     by idetape_postpone_request) to poll for the DSC bit
- *     in the status register.
- *
- *     We take care not to perform any tape access if the driver is
- *     accessing the other device. We will instead ask ide.c to sample
- *     the tape status register on our behalf in the next call to do_request,
- *     at the point in which the other device is idle, or assume that
- *     DSC was received even though we won't verify it (but when we assume
- *     that, it will usually have a solid basis).
- *
- *     The use of cli () below is a must, as we inspect and change
- *     the device request list while another request is active.
- */
-void idetape_poll_for_dsc (unsigned long data)
-
-{
-       ide_drive_t *drive=(ide_drive_t *) data;
-       unsigned int major = HWIF(drive)->major;
-       idetape_tape_t *tape=&(drive->tape);
-       struct blk_dev_struct *bdev = &blk_dev[major];
-       struct request *next_rq;
-       unsigned long flags;
-       idetape_status_reg_t status;
-
-#if IDETAPE_DEBUG_LOG
-       printk ("%s: idetape_poll_for_dsc called\n",drive->name);
-#endif /* IDETAPE_DEBUG_LOG */ 
-
-       save_flags (flags);cli ();
-
-       /*
-        *      Check if the other device is idle. If there are no requests,
-        *      we can safely access the tape.
-        */
-
-       if (HWGROUP (drive)->rq == NULL) {
-               sti ();
-               idetape_poll_for_dsc_direct (data);
-               return;
-       }
-
-       /*
-        *      If DSC was received, re-insert our postponed request into
-        *      the request queue (using ide_next).
-        */
-
-       status.all=tape->last_status;
-
-       if (status.b.dsc) {                                     /* DSC received */
-               tape->dsc_received=1;
-               idetape_put_back_postponed_request (drive);
-               del_timer (&(tape->dsc_timer));
-               restore_flags (flags);
-               return;
-       }
-
-       /*
-        *      At this point, DSC may have been received, but we can't
-        *      check it. We now have two options:
-        *
-        *              1.      The "simple" method - We can continue polling
-        *                      until we know the value of DSC.
-        *
-        *      but we also have a more clever option :-)
-        *
-        *              2.      We can sometimes more or less anticipate in
-        *                      advance how much time it will take for
-        *                      the tape to perform the request. This is the
-        *                      place to take advantage of this !
-        *
-        *                      We can assume that DSC was received, put
-        *                      back our request, and hope that we will have
-        *                      a "cache hit". This will only work when
-        *                      we haven't initiated the packet command yet,
-        *                      but this is the common read/write case. As
-        *                      for the slower media access commands, fallback
-        *                      to method 1 above.
-        *
-        *      When using method 2, we can also take advantage of the
-        *      knowledge of the tape's internal buffer size - We can
-        *      precalculate the time it will take for the tape to complete
-        *      servicing not only one request, but rather, say, 50% of its
-        *      internal buffer. The polling period will then be much larger,
-        *      decreasing our load on Linux, and we will also call
-        *      idetape_postpone_request less often, as there will usually
-        *      be more room in the internal tape buffer while we are in
-        *      idetape_do_request.
-        *
-        *      For this method to work well, the ongoing request of the
-        *      other device should be serviced by the time the tape is
-        *      still working on its remaining 50% internal buffer. This
-        *      will usually happen when the other device is much faster
-        *      than the tape.
-        */
-
-#if IDETAPE_ANTICIPATE_READ_WRITE_DSC
-
-       /*
-        *      Method 2.
-        *
-        *      There is a high chance that DSC was received, even though
-        *      we couldn't verify it. Let's hope that it's a "cache hit"
-        *      rather than a "cache miss". Someday I will probably add a
-        *      feedback loop around the number of "cache hits" which will
-        *      fine-tune the polling period.
-        */
-        
-       if (tape->postponed_rq->cmd != IDETAPE_PACKET_COMMAND_REQUEST_TYPE1) {
-
-               /*
-                *      We can use this method only when the packet command
-                *      was still not initiated.
-                */
-                
-               idetape_put_back_postponed_request (drive);
-               del_timer (&(tape->dsc_timer));
-               restore_flags (flags);
-               return;
-       }
-#endif /* IDETAPE_ANTICIPATE_READ_WRITE_DSC */
-
-       /*
-        *      Fallback to method 1.
-        */
-
-       next_rq=bdev->current_request;
-       if (next_rq == HWGROUP (drive)->rq)
-               next_rq=next_rq->next;
-
-       if (next_rq == NULL) {
-
-               /*
-                *      There will not be another request after the currently
-                *      ongoing request, so ide.c won't be able to sample
-                *      the status register on our behalf in do_request.
-                *
-                *      In case we are waiting for DSC before the packet
-                *      command was initiated, we will put back our postponed
-                *      request and have another look at the status register
-                *      in idetape_do_request, as done in method 2 above.
-                *
-                *      In case we already initiated the command, we can't
-                *      put it back, but it is anyway a slow media access
-                *      command. We will just give up and poll again until
-                *      we are lucky.
-                */
-
-               if (tape->postponed_rq->cmd == IDETAPE_PACKET_COMMAND_REQUEST_TYPE1) {
-
-                       /*
-                        *      Media access command - Poll again.
-                        *
-                        *      We set tape->request_status to 1, just in case
-                        *      other requests are added while we are waiting.
-                        */
-                        
-                       tape->request_status=1;
-                       restore_flags (flags);
-                       tape->dsc_timer.expires = jiffies + tape->dsc_polling_frequency;
-                       add_timer(&(tape->dsc_timer));
-                       return;
-               }
-               
-               /*
-                *      The packet command hasn't been sent to the tape yet -
-                *      We can safely put back the request and have another
-                *      look at the status register in idetape_do_request.
-                */
-
-               idetape_put_back_postponed_request (drive);
-               del_timer (&(tape->dsc_timer));
-               restore_flags (flags);
-               return;
-       }
-
-       /*
-        *      There will be another request after the current request.
-        *
-        *      Request ide.c to sample for us the tape's status register
-        *      before the next request.
-        */
-
-       tape->request_status=1;
-       restore_flags (flags);
-
-       if (jiffies > tape->dsc_timeout)        {               /* Timeout */
-               tape->dsc_received=0;
-               /* ??? */
-               idetape_put_back_postponed_request (drive);
-               del_timer (&(tape->dsc_timer));
-               restore_flags (flags);
-               return;
-       }
-
-       /* Poll again */
-       
-       if (jiffies - tape->dsc_polling_start > IDETAPE_FAST_SLOW_THRESHOLD)
-               tape->dsc_timer.expires = jiffies + IDETAPE_DSC_SLOW_MEDIA_ACCESS_FREQUENCY;
-       else
-               tape->dsc_timer.expires = jiffies + tape->dsc_polling_frequency;
-       add_timer(&(tape->dsc_timer));
-       return;
-}
-
-/*
- *     idetape_put_back_postponed_request gets called when we decided to
- *     stop polling for DSC and continue servicing our postponed request.
- */
-
-void idetape_put_back_postponed_request (ide_drive_t *drive)
+       bh->b_size -= tape->excess_bh_size;
+       return stage;
+abort:
+       __idetape_kfree_stage (stage);
+       return NULL;
+}
 
+static idetape_stage_t *idetape_kmalloc_stage (idetape_tape_t *tape)
 {
-       idetape_tape_t *tape = &(drive->tape);
+       idetape_stage_t *cache_stage = tape->cache_stage;
 
 #if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: Putting back postponed request\n");
+       printk (KERN_INFO "Reached idetape_kmalloc_stage\n");
 #endif /* IDETAPE_DEBUG_LOG */
-#if IDETAPE_DEBUG_BUGS
-       if (tape->postponed_rq == NULL) {
-               printk ("tape->postponed_rq is NULL in put_back_postponed_request\n");
-               return;
-       }
-#endif /* IDETAPE_DEBUG_BUGS */
-       (void) ide_do_drive_cmd (drive, tape->postponed_rq, ide_next);
 
-       /*
-        *      Note that the procedure done here is different than the method
-        *      we are using in idetape_queue_pc_head - There we are putting
-        *      request(s) before our currently called request.
-        *
-        *      Here, on the other hand, HWGROUP(drive)->rq is not our
-        *      request but rather a request to another device. Therefore,
-        *      we will let it finish and only then service our postponed
-        *      request --> We don't touch HWGROUP(drive)->rq.
-        */
+       if (tape->nr_stages >= tape->max_stages)
+               return NULL;
+       if (cache_stage != NULL) {
+               tape->cache_stage = NULL;
+               return cache_stage;
+       }
+       return __idetape_kmalloc_stage (tape);
 }
 
-void idetape_media_access_finished (ide_drive_t *drive)
-
+static void idetape_copy_stage_from_user (idetape_tape_t *tape, idetape_stage_t *stage, const char *buf, int n)
 {
-       idetape_tape_t *tape=&(drive->tape);
-       idetape_status_reg_t status;
-       idetape_packet_command_t *pc;
-
-       pc=tape->pc;
-       
-       status.all=IN_BYTE (IDETAPE_STATUS_REG);
+       struct buffer_head *bh = tape->bh;
+       int count;
 
-       if (tape->dsc_received) {
-#if IDETAPE_DEBUG_LOG
-               printk ("DSC received\n");
-#endif /* IDETAPE_DEBUG_LOG */
-               if (status.b.check) {                                   /* Error detected */
-                       printk ("ide-tape: %s: I/O error, ",drive->name);
-                       idetape_retry_pc (drive);                       /* Retry operation */
+       while (n) {
+#if IDETAPE_DEBUG_BUGS
+               if (bh == NULL) {
+                       printk (KERN_ERR "ide-tape: bh == NULL in idetape_copy_stage_from_user\n");
                        return;
                }
-               pc->error=0;
-               if (tape->failed_pc == pc)
-                       tape->failed_pc=NULL;
-#if IDETAPE_DEBUG_BUGS
-               if (pc->callback==NULL)
-                       printk ("ide-tape: ide-tape bug - Callback function not set !\n");
-               else
 #endif /* IDETAPE_DEBUG_BUGS */
-                       (*pc->callback)(drive);
-
-               return;
+               count = IDETAPE_MIN (bh->b_size - bh->b_count, n);
+               copy_from_user (bh->b_data + bh->b_count, buf, count);
+               n -= count; bh->b_count += count; buf += count;
+               if (bh->b_count == bh->b_size) {
+                       bh = bh->b_reqnext;
+                       if (bh)
+                               bh->b_count = 0;
+               }
        }
-       else {
-               printk ("ide-tape: %s: DSC timeout.\n",drive->name);
-               /* ??? */
-               pc->error=1;
-               tape->failed_pc=NULL;
+       tape->bh = bh;
+}
+
+static void idetape_copy_stage_to_user (idetape_tape_t *tape, char *buf, idetape_stage_t *stage, int n)
+{
+       struct buffer_head *bh = tape->bh;
+       int count;
+
+       while (n) {
 #if IDETAPE_DEBUG_BUGS
-               if (pc->callback==NULL)
-                       printk ("ide-tape: ide-tape bug - Callback function not set !\n");
-               else
+               if (bh == NULL) {
+                       printk (KERN_ERR "ide-tape: bh == NULL in idetape_copy_stage_to_user\n");
+                       return;
+               }
 #endif /* IDETAPE_DEBUG_BUGS */
-                       (*pc->callback)(drive);
-               return;
+               count = IDETAPE_MIN (tape->b_count, n);
+               copy_to_user (buf, tape->b_data, count);
+               n -= count; tape->b_data += count; tape->b_count -= count; buf += count;
+               if (!tape->b_count) {
+                       tape->bh = bh = bh->b_reqnext;
+                       if (bh) {
+                               tape->b_data = bh->b_data;
+                               tape->b_count = bh->b_count;
+                       }
+               }
+       }
+}
+
+static void idetape_init_merge_stage (idetape_tape_t *tape)
+{
+       struct buffer_head *bh = tape->merge_stage->bh;
+       
+       tape->bh = bh;
+       if (tape->chrdev_direction == idetape_direction_write)
+               bh->b_count = 0;
+       else {
+               tape->b_data = bh->b_data;
+               tape->b_count = bh->b_count;
        }
 }
 
+static void idetape_switch_buffers (idetape_tape_t *tape, idetape_stage_t *stage)
+{
+       struct buffer_head *tmp;
+
+       tmp = stage->bh;
+       stage->bh = tape->merge_stage->bh;
+       tape->merge_stage->bh = tmp;
+       idetape_init_merge_stage (tape);
+}
 
 /*
- *     idetape_retry_pc is called when an error was detected during the
- *     last packet command. We queue a request sense packet command in
- *     the head of the request list.
+ *     idetape_increase_max_pipeline_stages is a part of the feedback
+ *     loop which tries to find the optimum number of stages. In the
+ *     feedback loop, we are starting from a minimum maximum number of
+ *     stages, and if we sense that the pipeline is empty, we try to
+ *     increase it, until we reach the user compile time memory limit.
  */
-void idetape_retry_pc (ide_drive_t *drive)
-
+static void idetape_increase_max_pipeline_stages (ide_drive_t *drive)
 {
-       idetape_tape_t *tape = &drive->tape;
-       idetape_packet_command_t *pc;
-       struct request *new_rq;
+       idetape_tape_t *tape = drive->driver_data;
+       
+#if IDETAPE_DEBUG_LOG
+       printk (KERN_INFO "Reached idetape_increase_max_pipeline_stages\n");
+#endif /* IDETAPE_DEBUG_LOG */
 
-       idetape_error_reg_t error;
-       error.all=IN_BYTE (IDETAPE_ERROR_REG);
-       pc=idetape_next_pc_storage (drive);
-       new_rq=idetape_next_rq_storage (drive);
-       idetape_create_request_sense_cmd (pc); 
-       pc->buffer=pc->temp_buffer;
-       pc->buffer_size=IDETAPE_TEMP_BUFFER_SIZE;
-       pc->current_position=pc->temp_buffer;
-       tape->reset_issued = 1;
-       idetape_queue_pc_head (drive,pc,new_rq);
+       tape->max_stages = IDETAPE_MIN (tape->max_stages + IDETAPE_INCREASE_STAGES_RATE, IDETAPE_MAX_PIPELINE_STAGES);
 }
 
 /*
- *     General packet command callback function.
+ *     idetape_add_stage_tail adds a new stage at the end of the pipeline.
  */
-void idetape_pc_callback (ide_drive_t *drive)
-
+static void idetape_add_stage_tail (ide_drive_t *drive,idetape_stage_t *stage)
 {
-       idetape_tape_t *tape;
-       struct request *rq;
-       
-       tape=&(drive->tape);
-       rq=HWGROUP(drive)->rq;
+       idetape_tape_t *tape = drive->driver_data;
+       unsigned long flags;
        
 #if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: Reached idetape_pc_callback\n");
+       printk (KERN_INFO "Reached idetape_add_stage_tail\n");
 #endif /* IDETAPE_DEBUG_LOG */
-       if (!tape->pc->error) {
+       save_flags (flags);
+       cli ();
+       stage->next=NULL;
+       if (tape->last_stage != NULL)
+               tape->last_stage->next=stage;
+       else
+               tape->first_stage=tape->next_stage=stage;
+       tape->last_stage=stage;
+       if (tape->next_stage == NULL)
+               tape->next_stage=tape->last_stage;
+       tape->nr_stages++;
+       tape->nr_pending_stages++;
+       restore_flags (flags);
+}
+
+/*
+ *     idetape_remove_stage_head removes tape->first_stage from the pipeline.
+ *     The caller should avoid race conditions.
+ */
+static void idetape_remove_stage_head (ide_drive_t *drive)
+{
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_stage_t *stage;
+       
 #if IDETAPE_DEBUG_LOG
-               printk ("Request completed\n");
+       printk (KERN_INFO "Reached idetape_remove_stage_head\n");
 #endif /* IDETAPE_DEBUG_LOG */
-               idetape_end_request (1,HWGROUP (drive));
+#if IDETAPE_DEBUG_BUGS
+       if (tape->first_stage == NULL) {
+               printk (KERN_ERR "ide-tape: bug: tape->first_stage is NULL\n");
+               return;         
        }
-       else {
-               idetape_end_request (0,HWGROUP (drive));
+       if (tape->active_stage == tape->first_stage) {
+               printk (KERN_ERR "ide-tape: bug: Trying to free our active pipeline stage\n");
+               return;
+       }
+#endif /* IDETAPE_DEBUG_BUGS */
+       stage=tape->first_stage;
+       tape->first_stage=stage->next;
+       idetape_kfree_stage (tape, stage);
+       tape->nr_stages--;
+       if (tape->first_stage == NULL) {
+               tape->last_stage=NULL;
+#if IDETAPE_DEBUG_BUGS
+               if (tape->next_stage != NULL)
+                       printk (KERN_ERR "ide-tape: bug: tape->next_stage != NULL\n");
+               if (tape->nr_stages)
+                       printk (KERN_ERR "ide-tape: bug: nr_stages should be 0 now\n");
+#endif /* IDETAPE_DEBUG_BUGS */
        }
-       return;
 }
 
-
-void idetape_read_callback (ide_drive_t *drive)
-
+/*
+ *     idetape_active_next_stage will declare the next stage as "active".
+ */
+static void idetape_active_next_stage (ide_drive_t *drive)
 {
-       idetape_tape_t *tape=&(drive->tape);
-       struct request *rq=HWGROUP(drive)->rq;
-       int blocks_read=tape->pc->actually_transferred/tape->tape_block_size;
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_stage_t *stage=tape->next_stage;
+       struct request *rq = &stage->rq;
 
-#if IDETAPE_DEBUG_LOG  
-       printk ("ide-tape: Reached idetape_read_callback\n");
+#if IDETAPE_DEBUG_LOG
+       printk (KERN_INFO "Reached idetape_active_next_stage\n");
 #endif /* IDETAPE_DEBUG_LOG */
+#if IDETAPE_DEBUG_BUGS
+       if (stage == NULL) {
+               printk (KERN_ERR "ide-tape: bug: Trying to activate a non existing stage\n");
+               return;
+       }
+#endif /* IDETAPE_DEBUG_BUGS */        
 
-       tape->block_address+=blocks_read;
-       rq->current_nr_sectors-=blocks_read;    
+       rq->buffer = NULL;
+       rq->bh = stage->bh;
+       tape->active_data_request=rq;
+       tape->active_stage=stage;
+       tape->next_stage=stage->next;
+}
 
-       if (!tape->pc->error)
-               idetape_end_request (1,HWGROUP (drive));
-       else {
-               rq->errors=tape->pc->error;
-               switch (rq->errors) {
-                       case IDETAPE_RQ_ERROR_FILEMARK:
-                       case IDETAPE_RQ_ERROR_EOD:
-                               break;
-               }
-               idetape_end_request (0,HWGROUP (drive));
+/*
+ *     idetape_insert_pipeline_into_queue is used to start servicing the
+ *     pipeline stages, starting from tape->next_stage.
+ */
+static void idetape_insert_pipeline_into_queue (ide_drive_t *drive)
+{
+       idetape_tape_t *tape = drive->driver_data;
+
+       if (tape->next_stage == NULL)
+               return;
+       if (!idetape_pipeline_active (tape)) {
+               idetape_active_next_stage (drive);
+               (void) ide_do_drive_cmd (drive, tape->active_data_request, ide_end);
        }
-       return;
 }
 
-void idetape_write_callback (ide_drive_t *drive)
+static void idetape_abort_pipeline (ide_drive_t *drive)
+{
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_stage_t *stage = tape->next_stage;
+
+       while (stage) {
+               stage->rq.cmd = IDETAPE_ABORTED_WRITE_RQ;
+               stage = stage->next;
+       }
+}
 
+/*
+ *     idetape_end_request is used to finish servicing a request, and to
+ *     insert a pending pipeline request into the main device queue.
+ */
+static void idetape_end_request (byte uptodate, ide_hwgroup_t *hwgroup)
 {
-       idetape_tape_t *tape=&(drive->tape);
-       struct request *rq=HWGROUP(drive)->rq;
-       int blocks_written=tape->pc->actually_transferred/tape->tape_block_size;
-               
-#if IDETAPE_DEBUG_LOG  
-       printk ("ide-tape: Reached idetape_write_callback\n");
+       ide_drive_t *drive = hwgroup->drive;
+       struct request *rq = hwgroup->rq;
+       idetape_tape_t *tape = drive->driver_data;
+       unsigned int major = HWIF(drive)->major;
+       struct blk_dev_struct *bdev = &blk_dev[major];
+       int error;
+
+#if IDETAPE_DEBUG_LOG
+       printk (KERN_INFO "Reached idetape_end_request\n");
 #endif /* IDETAPE_DEBUG_LOG */
 
-       tape->block_address+=blocks_written;
-       rq->current_nr_sectors-=blocks_written;
+       bdev->current_request=rq;                       /* Since we may have taken it out */
 
-       if (!tape->pc->error)
-               idetape_end_request (1,HWGROUP (drive));
-       else {
-               rq->errors=tape->pc->error;
-               idetape_end_request (0,HWGROUP (drive));
+       switch (uptodate) {
+               case 0: error = IDETAPE_ERROR_GENERAL; break;
+               case 1: error = 0; break;
+               default: error = uptodate;
        }
-       return;
-}
+       rq->errors = error;
+       if (error)
+               tape->failed_pc = NULL;
 
-void idetape_inquiry_callback (ide_drive_t *drive)
+       if (tape->active_data_request == rq) {          /* The request was a pipelined data transfer request */
+               tape->active_stage = NULL;
+               tape->active_data_request = NULL;
+               tape->nr_pending_stages--;
+               if (rq->cmd == IDETAPE_WRITE_RQ) {
+                       if (error) {
+                               set_bit (IDETAPE_PIPELINE_ERROR, &tape->flags);
+                               if (error == IDETAPE_ERROR_EOD)
+                                       idetape_abort_pipeline (drive);
+                       }
+                       idetape_remove_stage_head (drive);
+               }
+               if (tape->next_stage != NULL) {
+                       idetape_active_next_stage (drive);
 
-{
-       idetape_tape_t *tape;
-       
-       tape=&(drive->tape);
-       
-       idetape_display_inquiry_result (tape->pc->buffer);
-       idetape_pc_callback (drive);
-       return;
+                       /*
+                        *      Insert the next request into the request queue.
+                        *      The choice of using ide_next or ide_end is now left to the user.
+                        */
+#if IDETAPE_LOW_TAPE_PRIORITY
+                       (void) ide_do_drive_cmd (drive, tape->active_data_request, ide_end);
+#else
+                       (void) ide_do_drive_cmd (drive, tape->active_data_request, ide_next);
+#endif /* IDETAPE_LOW_TAPE_PRIORITY */
+               } else if (!error)
+                       idetape_increase_max_pipeline_stages (drive);
+       }
+       ide_end_drive_cmd (drive, 0, 0);
 }
 
 /*
- *     idetape_input_data is called to read data from the tape's data
- *     register. We basically let ide_input_data do the job, but we also
- *     take care about the remaining bytes which can not be transferred
- *     in 32-bit data transfers.
+ *     idetape_analyze_error is called on each failed packet command retry
+ *     to analyze the request sense. We currently do not utilize this
+ *     information.
  */
-void idetape_input_data (ide_drive_t *drive,void *buffer, unsigned long bcount)
-
+static void idetape_analyze_error (ide_drive_t *drive,idetape_request_sense_result_t *result)
 {
-       unsigned long wcount;
-       
-       wcount=bcount >> 2;
-       bcount -= 4*wcount;
-       
-       if (wcount)
-               ide_input_data (drive,buffer,wcount);
-       
-       if (bcount) {
-               ((byte *)buffer) += 4*wcount;
-               insb (IDETAPE_DATA_REG,buffer,bcount);
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_pc_t *pc = tape->failed_pc;
+               
+       tape->sense_key = result->sense_key; tape->asc = result->asc; tape->ascq = result->ascq;
+#if IDETAPE_DEBUG_LOG
+       /*
+        *      Without debugging, we only log an error if we decided to
+        *      give up retrying.
+        */
+       printk (KERN_INFO "ide-tape: pc = %x, sense key = %x, asc = %x, ascq = %x\n",pc->c[0],result->sense_key,result->asc,result->ascq);
+#endif /* IDETAPE_DEBUG_LOG */
+
+#ifdef CONFIG_BLK_DEV_TRITON
+
+       /*
+        *      Correct pc->actually_transferred by asking the tape.
+        */
+       if (test_bit (PC_DMA_ERROR, &pc->flags)) {
+               pc->actually_transferred = pc->request_transfer - tape->tape_block_size * ntohl (get_unaligned (&result->information));
+               idetape_update_buffers (pc);
+       }
+#endif /* CONFIG_BLK_DEV_TRITON */
+       if (pc->c[0] == IDETAPE_READ_CMD && result->filemark) {
+               pc->error = IDETAPE_ERROR_FILEMARK;
+               set_bit (PC_ABORT, &pc->flags);
+       }
+       if (pc->c[0] == IDETAPE_WRITE_CMD) {
+               if (result->eom || (result->sense_key == 0xd && result->asc == 0x0 && result->ascq == 0x2)) {
+                       pc->error = IDETAPE_ERROR_EOD;
+                       set_bit (PC_ABORT, &pc->flags);
+               }
+       }
+       if (pc->c[0] == IDETAPE_READ_CMD || pc->c[0] == IDETAPE_WRITE_CMD) {
+               if (result->sense_key == 8) {
+                       pc->error = IDETAPE_ERROR_EOD;
+                       set_bit (PC_ABORT, &pc->flags);
+               }
+               if (!test_bit (PC_ABORT, &pc->flags) && pc->actually_transferred)
+                       pc->retries = IDETAPE_MAX_PC_RETRIES + 1;
        }
 }
 
-/*
- *     idetape_output_data is used to write data to the tape.
- */
-void idetape_output_data (ide_drive_t *drive,void *buffer, unsigned long bcount)
-
+static void idetape_request_sense_callback (ide_drive_t *drive)
 {
-       unsigned long wcount;
-       
-       wcount=bcount >> 2;
-       bcount -= 4*wcount;
-       
-       if (wcount)
-               ide_output_data (drive,buffer,wcount);
-       
-       if (bcount) {
-               ((byte *)buffer) += 4*wcount;
-               outsb (IDETAPE_DATA_REG,buffer,bcount);
+       idetape_tape_t *tape = drive->driver_data;
+
+#if IDETAPE_DEBUG_LOG
+       printk (KERN_INFO "ide-tape: Reached idetape_request_sense_callback\n");
+#endif /* IDETAPE_DEBUG_LOG */
+       if (!tape->pc->error) {
+               idetape_analyze_error (drive,(idetape_request_sense_result_t *) tape->pc->buffer);
+               idetape_end_request (1,HWGROUP (drive));
+       } else {
+               printk (KERN_ERR "Error in REQUEST SENSE itself - Aborting request!\n");
+               idetape_end_request (0,HWGROUP (drive));
        }
 }
 
 /*
- *     Too bad. The drive wants to send us data which we are not ready to accept.
- *     Just throw it away.
+ *     idetape_init_pc initializes a packet command.
  */
-void idetape_discard_data (ide_drive_t *drive, unsigned long bcount)
-
+static void idetape_init_pc (idetape_pc_t *pc)
 {
-       unsigned long i;
-       
-       for (i=0;i<bcount;i++)
-               IN_BYTE (IDETAPE_DATA_REG);
+       memset (pc->c, 0, 12);
+       pc->retries = 0;
+       pc->flags = 0;
+       pc->request_transfer = 0;
+       pc->buffer = pc->pc_buffer;
+       pc->buffer_size = IDETAPE_PC_BUFFER_SIZE;
+       pc->bh = NULL;
+       pc->b_data = NULL;
 }
 
-/*
- *     Issue an INQUIRY packet command.
- */
-void idetape_create_inquiry_cmd (idetape_packet_command_t *pc)
-
+static void idetape_create_request_sense_cmd (idetape_pc_t *pc)
 {
-#if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: Creating INQUIRY packet command\n");
-#endif /* IDETAPE_DEBUG_LOG */ 
-       pc->request_transfer=36;
-       pc->callback=&idetape_inquiry_callback;
-       pc->writing=0;
-       
-       idetape_zero_packet_command (pc);
-       pc->c[0]=IDETAPE_INQUIRY_CMD;
-       pc->c[4]=255;
+       idetape_init_pc (pc);   
+       pc->c[0] = IDETAPE_REQUEST_SENSE_CMD;
+       pc->c[4] = 255;
+       pc->request_transfer = 18;
+       pc->callback = &idetape_request_sense_callback;
 }
 
 /*
- *     Format the INQUIRY command results.
+ *     idetape_retry_pc is called when an error was detected during the
+ *     last packet command. We queue a request sense packet command in
+ *     the head of the request list.
  */
-void idetape_display_inquiry_result (byte *buffer)
-
+static void idetape_retry_pc (ide_drive_t *drive)
 {
-       idetape_inquiry_result_t *result;
-
-       result=(idetape_inquiry_result_t *) buffer;
-       ide_fixstring (result->vendor_id,8,0);
-       ide_fixstring (result->product_id,16,0);
-       ide_fixstring (result->revision_level,4,0);
-
-       if (result->response_format != 2) {
-               printk ("The INQUIRY Data Format is unknown to us !\n");
-               printk ("Assuming QIC-157C format.\n");
-       }
-
-#if IDETAPE_DEBUG_LOG
-       printk ("Dumping INQUIRY command results:\n");
-       printk ("Response Data Format: %d - ",result->response_format);
-       switch (result->response_format) {
-               case 2:
-                       printk ("As specified in QIC-157 Revision C\n");
-                       break;
-               default:
-                       printk ("Unknown\n");
-                       break;
-       }
-       
-       printk ("Device Type: %x - ",result->device_type);      
-       switch (result->device_type) {
-               case 0: printk ("Direct-access Device\n");break;
-               case 1: printk ("Streaming Tape Device\n");break;
-               case 2: case 3: case 4: printk ("Reserved\n");break;
-               case 5: printk ("CD-ROM Device\n");break;
-               case 6: printk ("Reserved\n");
-               case 7: printk ("Optical memory Device\n");break;
-               case 0x1f: printk ("Unknown or no Device type\n");break;
-               default: printk ("Reserved\n");
-       }
-       
-       printk ("Removable Medium: %s",result->rmb ? "Yes\n":"No\n");
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_pc_t *pc;
+       struct request *rq;
+       idetape_error_reg_t error;
 
-       printk ("ANSI Version: %d - ",result->ansi_version);
-       switch (result->ansi_version) {
-               case 2:
-                       printk ("QIC-157 Revision C\n");
-                       break;
-               default:
-                       printk ("Unknown\n");
-                       break;
-       }
+       error.all = IN_BYTE (IDE_ERROR_REG);
+       pc = idetape_next_pc_storage (drive);
+       rq = idetape_next_rq_storage (drive);
+       idetape_create_request_sense_cmd (pc);
+       set_bit (IDETAPE_IGNORE_DSC, &tape->flags);
+       idetape_queue_pc_head (drive, pc, rq);
+}
 
-       printk ("ECMA Version: ");
-       if (result->ecma_version)
-               printk ("%d\n",result->ecma_version);
-       else
-               printk ("Not supported\n");
+/*
+ *     idetape_pc_intr is the usual interrupt handler which will be called
+ *     during a packet command. We will transfer some of the data (as
+ *     requested by the drive) and will re-point interrupt handler to us.
+ *     When data transfer is finished, we will act according to the
+ *     algorithm described before idetape_issue_packet_command.
+ *
+ */
+static void idetape_pc_intr (ide_drive_t *drive)
+{
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_status_reg_t status;
+       idetape_bcount_reg_t bcount;
+       idetape_ireason_reg_t ireason;
+       idetape_pc_t *pc=tape->pc;
+       unsigned int temp;
 
-       printk ("ISO Version: ");
-       if (result->iso_version)
-               printk ("%d\n",result->iso_version);
-       else
-               printk ("Not supported\n");
+#if IDETAPE_DEBUG_LOG
+       printk (KERN_INFO "ide-tape: Reached idetape_pc_intr interrupt handler\n");
+#endif /* IDETAPE_DEBUG_LOG */ 
 
-       printk ("Additional Length: %d\n",result->additional_length);
-       printk ("Vendor Identification: %s\n",result->vendor_id);
-       printk ("Product Identification: %s\n",result->product_id);
-       printk ("Product Revision Level: %s\n",result->revision_level);
+#ifdef CONFIG_BLK_DEV_TRITON
+       if (test_bit (PC_DMA_IN_PROGRESS, &pc->flags)) {
+               if (HWIF(drive)->dmaproc(ide_dma_status_bad, drive)) {
+                       set_bit (PC_DMA_ERROR, &pc->flags);
+                       /*
+                        *      We will currently correct the following in
+                        *      idetape_analyze_error.
+                        */
+                       pc->actually_transferred=HWIF(drive)->dmaproc(ide_dma_transferred, drive);
+               } else {
+                       pc->actually_transferred=pc->request_transfer;
+                       idetape_update_buffers (pc);
+               }
+               (void) (HWIF(drive)->dmaproc(ide_dma_abort, drive));    /* End DMA */
+#if IDETAPE_DEBUG_LOG
+               printk (KERN_INFO "ide-tape: DMA finished\n");
 #endif /* IDETAPE_DEBUG_LOG */
-
-       if (result->device_type != 1)
-               printk ("Device type is not set to tape\n");
-
-       if (!result->rmb)
-               printk ("The removable flag is not set\n");
-
-       if (result->ansi_version != 2) {
-               printk ("The Ansi Version is unknown to us !\n");
-               printk ("Assuming compliance with QIC-157C specification.\n");
        }
-}
+#endif /* CONFIG_BLK_DEV_TRITON */
 
-void idetape_create_request_sense_cmd (idetape_packet_command_t *pc)
+       status.all = GET_STAT();                                        /* Clear the interrupt */
 
-{
+       if (!status.b.drq) {                                            /* No more interrupts */
 #if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: Creating REQUEST SENSE packet command\n");
-#endif /* IDETAPE_DEBUG_LOG */ 
-       pc->request_transfer=18;
-       pc->callback=&idetape_request_sense_callback;
-       pc->writing=0;
-       
-       idetape_zero_packet_command (pc);       
-       pc->c[0]=IDETAPE_REQUEST_SENSE_CMD;
-       pc->c[4]=255;
-}
-
-void idetape_request_sense_callback (ide_drive_t *drive)
+               printk (KERN_INFO "Packet command completed, %d bytes transferred\n", pc->actually_transferred);
+#endif /* IDETAPE_DEBUG_LOG */
+               clear_bit (PC_DMA_IN_PROGRESS, &pc->flags);
 
-{
-       idetape_tape_t *tape=&(drive->tape);
+               ide_sti();
 
+               if (status.b.check || test_bit (PC_DMA_ERROR, &pc->flags)) {    /* Error detected */
 #if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: Reached idetape_request_sense_callback\n");
+                       printk (KERN_INFO "ide-tape: %s: I/O error, ",tape->name);
 #endif /* IDETAPE_DEBUG_LOG */
-       if (!tape->pc->error) {
+                       if (pc->c[0] == IDETAPE_REQUEST_SENSE_CMD) {
+                               printk (KERN_ERR "ide-tape: I/O error in request sense command\n");
+                               ide_do_reset (drive);
+                               return;
+                       }
+                       idetape_retry_pc (drive);                               /* Retry operation */
+                       return;
+               }
+               pc->error = 0;
+               if (test_bit (PC_WAIT_FOR_DSC, &pc->flags) && !status.b.dsc) {  /* Media access command */
+                       tape->dsc_polling_start = jiffies;
+                       tape->dsc_polling_frequency = IDETAPE_DSC_MA_FAST;
+                       tape->dsc_timeout = jiffies + IDETAPE_DSC_MA_TIMEOUT;
+                       idetape_postpone_request (drive);               /* Allow ide.c to handle other requests */
+                       return;
+               }
+               if (tape->failed_pc == pc)
+                       tape->failed_pc=NULL;
+               pc->callback(drive);                    /* Command finished - Call the callback function */
+               return;
+       }
+#ifdef CONFIG_BLK_DEV_TRITON
+       if (clear_bit (PC_DMA_IN_PROGRESS, &pc->flags)) {
+               printk (KERN_ERR "ide-tape: The tape wants to issue more interrupts in DMA mode\n");
+               printk (KERN_ERR "ide-tape: DMA disabled, reverting to PIO\n");
+               drive->using_dma=0;
+               ide_do_reset (drive);
+               return;
+       }
+#endif /* CONFIG_BLK_DEV_TRITON */
+       bcount.b.high=IN_BYTE (IDE_BCOUNTH_REG);                        /* Get the number of bytes to transfer */
+       bcount.b.low=IN_BYTE (IDE_BCOUNTL_REG);                         /* on this interrupt */
+       ireason.all=IN_BYTE (IDE_IREASON_REG);
+
+       if (ireason.b.cod) {
+               printk (KERN_ERR "ide-tape: CoD != 0 in idetape_pc_intr\n");
+               ide_do_reset (drive);
+               return;
+       }
+       if (ireason.b.io == test_bit (PC_WRITING, &pc->flags)) {        /* Hopefully, we will never get here */
+               printk (KERN_ERR "ide-tape: We wanted to %s, ", ireason.b.io ? "Write":"Read");
+               printk (KERN_ERR "but the tape wants us to %s !\n",ireason.b.io ? "Read":"Write");
+               ide_do_reset (drive);
+               return;
+       }
+       if (!test_bit (PC_WRITING, &pc->flags)) {                       /* Reading - Check that we have enough space */
+               temp = pc->actually_transferred + bcount.all;
+               if ( temp > pc->request_transfer) {
+                       if (temp > pc->buffer_size) {
+                               printk (KERN_ERR "ide-tape: The tape wants to send us more data than expected - discarding data\n");
+                               idetape_discard_data (drive,bcount.all);
+                               ide_set_handler (drive,&idetape_pc_intr,WAIT_CMD);
+                               return;
+                       }
 #if IDETAPE_DEBUG_LOG
-               printk ("Request completed\n");
+                       printk (KERN_NOTICE "ide-tape: The tape wants to send us more data than expected - allowing transfer\n");
 #endif /* IDETAPE_DEBUG_LOG */
-               idetape_analyze_error (drive,(idetape_request_sense_result_t *) tape->pc->buffer);
-               idetape_end_request (1,HWGROUP (drive));
+               }
        }
-       else {
-               printk ("Error in REQUEST SENSE itself - Aborting request!\n");
-               idetape_end_request (0,HWGROUP (drive));
+       if (test_bit (PC_WRITING, &pc->flags)) {
+               if (pc->bh != NULL)
+                       idetape_output_buffers (drive, pc, bcount.all);
+               else
+                       atapi_output_bytes (drive,pc->current_position,bcount.all);     /* Write the current buffer */
+       } else {
+               if (pc->bh != NULL)
+                       idetape_input_buffers (drive, pc, bcount.all);
+               else
+                       atapi_input_bytes (drive,pc->current_position,bcount.all);      /* Read the current buffer */
        }
-       return;
+       pc->actually_transferred+=bcount.all;                                   /* Update the current position */
+       pc->current_position+=bcount.all;
+
+       ide_set_handler (drive,&idetape_pc_intr,WAIT_CMD);              /* And set the interrupt handler again */
 }
 
 /*
- *     idetape_analyze_error is called on each failed packet command retry
- *     to analyze the request sense. We currently do not utilize this
- *     information.
+ *     Packet Command Interface
+ *
+ *     The current Packet Command is available in tape->pc, and will not
+ *     change until we finish handling it. Each packet command is associated
+ *     with a callback function that will be called when the command is
+ *     finished.
+ *
+ *     The handling will be done in three stages:
+ *
+ *     1.      idetape_issue_packet_command will send the packet command to the
+ *             drive, and will set the interrupt handler to idetape_pc_intr.
+ *
+ *     2.      On each interrupt, idetape_pc_intr will be called. This step
+ *             will be repeated until the device signals us that no more
+ *             interrupts will be issued.
+ *
+ *     3.      ATAPI Tape media access commands have immediate status with a
+ *             delayed process. In case of a successful initiation of a
+ *             media access packet command, the DSC bit will be set when the
+ *             actual execution of the command is finished. 
+ *             Since the tape drive will not issue an interrupt, we have to
+ *             poll for this event. In this case, we define the request as
+ *             "low priority request" by setting rq_status to
+ *             IDETAPE_RQ_POSTPONED,   set a timer to poll for DSC and exit
+ *             the driver.
+ *
+ *             ide.c will then give higher priority to requests which
+ *             originate from the other device, until will change rq_status
+ *             to RQ_ACTIVE.
+ *
+ *     4.      When the packet command is finished, it will be checked for errors.
+ *
+ *     5.      In case an error was found, we queue a request sense packet command
+ *             in front of the request queue and retry the operation up to
+ *             IDETAPE_MAX_PC_RETRIES times.
+ *
+ *     6.      In case no error was found, or we decided to give up and not
+ *             to retry again, the callback function will be called and then
+ *             we will handle the next request.
+ *
  */
-void idetape_analyze_error (ide_drive_t *drive,idetape_request_sense_result_t *result)
-
+static void idetape_issue_packet_command (ide_drive_t *drive, idetape_pc_t *pc)
 {
-       idetape_tape_t *tape=&(drive->tape);
-       idetape_packet_command_t *pc=tape->failed_pc;
-               
-       tape->sense_key=result->sense_key;
-       tape->asc=result->asc;
-       tape->ascq=result->ascq;
-       
-#if IDETAPE_DEBUG_LOG  
-       /*
-        *      Without debugging, we only log an error if we decided to
-        *      give up retrying.
-        */
-       printk ("ide-tape: pc = %x, sense key = %x, asc = %x, ascq = %x\n",pc->c[0],result->sense_key,result->asc,result->ascq);
-#endif /* IDETAPE_DEBUG_LOG */
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_bcount_reg_t bcount;
+       idetape_ireason_reg_t ireason;
+       int dma_ok=0;
 
-       if (pc->c[0] == IDETAPE_READ_CMD) {
-               if (result->filemark) {
-                       pc->error=IDETAPE_RQ_ERROR_FILEMARK;
-                       pc->abort=1;
-               }
+#if IDETAPE_DEBUG_BUGS
+       if (tape->pc->c[0] == IDETAPE_REQUEST_SENSE_CMD && pc->c[0] == IDETAPE_REQUEST_SENSE_CMD) {
+               printk (KERN_ERR "ide-tape: possible ide-tape.c bug - Two request sense in serial were issued\n");
        }
+#endif /* IDETAPE_DEBUG_BUGS */
 
-       if (pc->c[0] == IDETAPE_READ_CMD || pc->c[0] == IDETAPE_WRITE_CMD) {
-               if (result->sense_key == 8) {
-                       pc->error=IDETAPE_RQ_ERROR_EOD;
-                       pc->abort=1;
+       if (tape->failed_pc == NULL && pc->c[0] != IDETAPE_REQUEST_SENSE_CMD)
+               tape->failed_pc=pc;
+       tape->pc=pc;                                                    /* Set the current packet command */
+
+       if (pc->retries > IDETAPE_MAX_PC_RETRIES || test_bit (PC_ABORT, &pc->flags)) {
+               /*
+                *      We will "abort" retrying a packet command in case
+                *      a legitimate error code was received (crossing a
+                *      filemark, or DMA error in the end of media, for
+                *      example).
+                */
+               if (!test_bit (PC_ABORT, &pc->flags)) {
+                       printk (KERN_ERR "ide-tape: %s: I/O error, pc = %2x, key = %2x, asc = %2x, ascq = %2x\n",
+                               tape->name, pc->c[0], tape->sense_key, tape->asc, tape->ascq);
+                       pc->error = IDETAPE_ERROR_GENERAL;              /* Giving up */
                }
+               tape->failed_pc=NULL;
+               pc->callback(drive);
+               return;
        }
+#if IDETAPE_DEBUG_LOG
+       printk (KERN_INFO "Retry number - %d\n",pc->retries);
+#endif /* IDETAPE_DEBUG_LOG */
+
+       pc->retries++;
+       pc->actually_transferred=0;                                     /* We haven't transferred any data yet */
+       pc->current_position=pc->buffer;
+       bcount.all=pc->request_transfer;                                /* Request to transfer the entire buffer at once */
 
-#if 1
 #ifdef CONFIG_BLK_DEV_TRITON
+       if (clear_bit (PC_DMA_ERROR, &pc->flags)) {
+               printk (KERN_WARNING "ide-tape: DMA disabled, reverting to PIO\n");
+               drive->using_dma=0;
+       }
+       if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma)
+               dma_ok=!HWIF(drive)->dmaproc(test_bit (PC_WRITING, &pc->flags) ? ide_dma_write : ide_dma_read, drive);
+#endif /* CONFIG_BLK_DEV_TRITON */
 
-       /*
-        *      Correct pc->actually_transferred by asking the tape.
-        */
+       OUT_BYTE (drive->ctl,IDE_CONTROL_REG);
+       OUT_BYTE (dma_ok ? 1:0,IDE_FEATURE_REG);                        /* Use PIO/DMA */
+       OUT_BYTE (bcount.b.high,IDE_BCOUNTH_REG);
+       OUT_BYTE (bcount.b.low,IDE_BCOUNTL_REG);
+       OUT_BYTE (drive->select.all,IDE_SELECT_REG);
+
+       ide_set_handler (drive, &idetape_pc_intr, WAIT_CMD);            /* Set the interrupt routine */
+       OUT_BYTE (WIN_PACKETCMD,IDE_COMMAND_REG);                       /* Issue the packet command */
 
-       if (pc->dma_error && pc->abort) {
-               unsigned long *long_ptr=(unsigned long *) &(result->information1);
-               pc->actually_transferred=pc->request_transfer-tape->tape_block_size*idetape_swap_long (*long_ptr);
-       }               
+       if (ide_wait_stat (drive,DRQ_STAT,BUSY_STAT,WAIT_READY)) {      /* Wait for DRQ to be ready - Assuming Accelerated DRQ */
+               /*
+                *      We currently only support tape drives which report
+                *      accelerated DRQ assertion. For this case, specs
+                *      allow up to 50us. We really shouldn't get here.
+                *
+                *      ??? Still needs to think what to do if we reach
+                *      here anyway.
+                */
+               printk (KERN_ERR "ide-tape: Strange, packet command initiated yet DRQ isn't asserted\n");
+               return;
+       }
+       ireason.all=IN_BYTE (IDE_IREASON_REG);
+       if (!ireason.b.cod || ireason.b.io) {
+               printk (KERN_ERR "ide-tape: (IO,CoD) != (0,1) while issuing a packet command\n");
+               ide_do_reset (drive);
+               return;
+       }
+       atapi_output_bytes (drive,pc->c,12);                    /* Send the actual packet */
+#ifdef CONFIG_BLK_DEV_TRITON
+       if (dma_ok) {                                           /* Begin DMA, if necessary */
+               set_bit (PC_DMA_IN_PROGRESS, &pc->flags);
+               (void) (HWIF(drive)->dmaproc(ide_dma_begin, drive));
+       }
 #endif /* CONFIG_BLK_DEV_TRITON */
-#endif
 }
 
-void idetape_create_test_unit_ready_cmd (idetape_packet_command_t *pc)
+static void idetape_media_access_finished (ide_drive_t *drive)
+{
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_pc_t *pc = tape->pc;
+       idetape_status_reg_t status;
+
+       status.all = GET_STAT();
+       if (status.b.dsc) {
+               if (status.b.check) {                                   /* Error detected */
+                       printk (KERN_ERR "ide-tape: %s: I/O error, ",tape->name);
+                       idetape_retry_pc (drive);                       /* Retry operation */
+                       return;
+               }
+               pc->error = 0;
+               if (tape->failed_pc == pc)
+                       tape->failed_pc = NULL;
+       } else {
+               pc->error = IDETAPE_ERROR_GENERAL;
+               tape->failed_pc = NULL;
+       }
+       pc->callback (drive);
+}
 
+/*
+ *     General packet command callback function.
+ */
+static void idetape_pc_callback (ide_drive_t *drive)
 {
-#if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: Creating TEST UNIT READY packet command\n");
-#endif /* IDETAPE_DEBUG_LOG */ 
-       pc->request_transfer=0;
-       pc->buffer=NULL;
-       pc->current_position=NULL;
-       pc->callback=&idetape_pc_callback;
-       pc->writing=0;
+       idetape_tape_t *tape = drive->driver_data;
        
-       idetape_zero_packet_command (pc);       
-       pc->c[0]=IDETAPE_TEST_UNIT_READY_CMD;
-}
+#if IDETAPE_DEBUG_LOG
+       printk (KERN_INFO "ide-tape: Reached idetape_pc_callback\n");
+#endif /* IDETAPE_DEBUG_LOG */
 
-void idetape_create_locate_cmd (idetape_packet_command_t *pc,unsigned long block,byte partition)
+       idetape_end_request (tape->pc->error ? 0:1, HWGROUP(drive));
+}
 
+static void idetape_rw_callback (ide_drive_t *drive)
 {
-       unsigned long *ptr;
+       idetape_tape_t *tape = drive->driver_data;
+       struct request *rq = HWGROUP(drive)->rq;
+       int blocks = tape->pc->actually_transferred / tape->tape_block_size;
 
-#if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: Creating LOCATE packet command\n");
+#if IDETAPE_DEBUG_LOG  
+       printk (KERN_INFO "ide-tape: Reached idetape_rw_callback\n");
 #endif /* IDETAPE_DEBUG_LOG */
-       pc->request_transfer=0;
-       pc->buffer=NULL;
-       pc->current_position=NULL;
-       pc->buffer_size=0;
-       pc->wait_for_dsc=1;
-       pc->callback=&idetape_pc_callback;
-       pc->writing=0;
-               
-       idetape_zero_packet_command (pc);
-       pc->c [0]=IDETAPE_LOCATE_CMD;
-       pc->c [1]=2;
-       ptr=(unsigned long *) &(pc->c[3]);
-       *ptr=idetape_swap_long (block);
-       pc->c[8]=partition;
+
+       tape->block_address += blocks;
+       rq->current_nr_sectors -= blocks;
+
+       if (!tape->pc->error)
+               idetape_end_request (1, HWGROUP (drive));
+       else
+               idetape_end_request (tape->pc->error, HWGROUP (drive));
 }
 
-void idetape_create_rewind_cmd (idetape_packet_command_t *pc)
+static void idetape_create_locate_cmd (idetape_pc_t *pc, unsigned int block, byte partition)
+{
+       idetape_init_pc (pc);
+       pc->c[0] = IDETAPE_LOCATE_CMD;
+       pc->c[1] = 2;
+       put_unaligned (htonl (block), (unsigned int *) &pc->c[3]);
+       pc->c[8] = partition;
+       set_bit (PC_WAIT_FOR_DSC, &pc->flags);
+       pc->callback = &idetape_pc_callback;
+}
 
+static void idetape_create_rewind_cmd (idetape_pc_t *pc)
 {
-#if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: Creating REWIND packet command\n");
-#endif /* IDETAPE_DEBUG_LOG */
-       pc->request_transfer=0;
-       pc->buffer=NULL;
-       pc->current_position=NULL;
-       pc->buffer_size=0;
-       pc->wait_for_dsc=1;
-       pc->callback=&idetape_pc_callback;
-       pc->writing=0;
-               
-       idetape_zero_packet_command (pc);
-       pc->c [0]=IDETAPE_REWIND_CMD;
+       idetape_init_pc (pc);
+       pc->c[0] = IDETAPE_REWIND_CMD;
+       set_bit (PC_WAIT_FOR_DSC, &pc->flags);
+       pc->callback = &idetape_pc_callback;
 }
 
 /*
  *     A mode sense command is used to "sense" tape parameters.
  */
-
-void idetape_create_mode_sense_cmd (idetape_packet_command_t *pc,byte page_code)
-
+static void idetape_create_mode_sense_cmd (idetape_pc_t *pc, byte page_code)
 {
-#if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: Creating MODE SENSE packet command - Page %d\n",page_code);
-#endif /* IDETAPE_DEBUG_LOG */
-
-       pc->wait_for_dsc=0;
-       pc->callback=&idetape_pc_callback;
-       pc->writing=0;
-
-       switch (page_code) {
-               case IDETAPE_CAPABILITIES_PAGE:
-                       pc->request_transfer=24;
-       }
-               
-       idetape_zero_packet_command (pc);
-       pc->c [0]=IDETAPE_MODE_SENSE_CMD;
-       pc->c [1]=8;                            /* DBD = 1 - Don't return block descriptors for now */
-       pc->c [2]=page_code;
-       pc->c [3]=255;                          /* Don't limit the returned information */
-       pc->c [4]=255;                          /* (We will just discard data in that case) */
+       idetape_init_pc (pc);
+       pc->c[0] = IDETAPE_MODE_SENSE_CMD;
+       pc->c[1] = 8;                           /* DBD = 1 - Don't return block descriptors for now */
+       pc->c[2] = page_code;
+       pc->c[3] = 255;                         /* Don't limit the returned information */
+       pc->c[4] = 255;                         /* (We will just discard data in that case) */
+       if (page_code == IDETAPE_CAPABILITIES_PAGE)
+               pc->request_transfer = 24;
+#if IDETAPE_DEBUG_BUGS
+       else
+               printk (KERN_ERR "ide-tape: unsupported page code in create_mode_sense_cmd\n");
+#endif /* IDETAPE_DEBUG_BUGS */
+       pc->callback = &idetape_pc_callback;
 }
 
 /*
@@ -2591,373 +2126,136 @@ void idetape_create_mode_sense_cmd (idetape_packet_command_t *pc,byte page_code)
  *                     if write_filemark=0.
  *
  */
-void idetape_create_write_filemark_cmd (idetape_packet_command_t *pc,int write_filemark)
-
-{
-#if IDETAPE_DEBUG_LOG
-       printk ("Creating WRITE FILEMARK packet command\n");
-       if (!write_filemark)
-               printk ("which will only flush buffered data\n");
-#endif /* IDETAPE_DEBUG_LOG */
-       pc->request_transfer=0;
-       pc->buffer=NULL;
-       pc->current_position=NULL;
-       pc->buffer_size=0;
-       pc->wait_for_dsc=1;
-       pc->callback=&idetape_pc_callback;
-       pc->writing=0;
-               
-       idetape_zero_packet_command (pc);
-       pc->c [0]=IDETAPE_WRITE_FILEMARK_CMD;
-       if (write_filemark)
-               pc->c [4]=1;
-}
-
-void idetape_create_load_unload_cmd (idetape_packet_command_t *pc,int cmd)
-
-{
-#if IDETAPE_DEBUG_LOG
-       printk ("Creating LOAD UNLOAD packet command, cmd=%d\n",cmd);
-#endif /* IDETAPE_DEBUG_LOG */
-       pc->request_transfer=0;
-       pc->buffer=NULL;
-       pc->current_position=NULL;
-       pc->buffer_size=0;
-       pc->wait_for_dsc=1;
-       pc->callback=&idetape_pc_callback;
-       pc->writing=0;
-               
-       idetape_zero_packet_command (pc);
-       pc->c [0]=IDETAPE_LOAD_UNLOAD_CMD;
-       pc->c [4]=cmd;
-}
-
-void idetape_create_erase_cmd (idetape_packet_command_t *pc)
-
-{
-
-#if IDETAPE_DEBUG_LOG
-       printk ("Creating ERASE command\n");
-#endif /* IDETAPE_DEBUG_LOG */
-
-       pc->request_transfer=0;
-       pc->buffer=NULL;
-       pc->current_position=NULL;
-       pc->buffer_size=0;
-       pc->wait_for_dsc=1;
-       pc->callback=&idetape_pc_callback;
-       pc->writing=0;
-               
-       idetape_zero_packet_command (pc);
-       pc->c [0]=IDETAPE_ERASE_CMD;
-       pc->c [1]=1;
-}
-
-void idetape_create_read_cmd (idetape_packet_command_t *pc,unsigned long length)
-
+static void idetape_create_write_filemark_cmd (idetape_pc_t *pc,int write_filemark)
 {
-       union convert {
-               unsigned all    :32;
-               struct {
-                       unsigned b1     :8;
-                       unsigned b2     :8;
-                       unsigned b3     :8;
-                       unsigned b4     :8;
-               } b;
-       } original;
-       
-#if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: Creating READ packet command\n");
-#endif /* IDETAPE_DEBUG_LOG */
-
-       original.all=length;
-
-       pc->wait_for_dsc=0;
-       pc->callback=&idetape_read_callback;
-       pc->writing=0;
-
-       idetape_zero_packet_command (pc);
-
-       pc->c [0]=IDETAPE_READ_CMD;
-       pc->c [1]=1;
-       pc->c [4]=original.b.b1;
-       pc->c [3]=original.b.b2;
-       pc->c [2]=original.b.b3;
-
-       if (length)
-               pc->dma_recommended=1;
-
-       return;
+       idetape_init_pc (pc);
+       pc->c[0] = IDETAPE_WRITE_FILEMARK_CMD;
+       pc->c[4] = write_filemark;
+       set_bit (PC_WAIT_FOR_DSC, &pc->flags);
+       pc->callback = &idetape_pc_callback;
 }
 
-void idetape_create_space_cmd (idetape_packet_command_t *pc,long count,byte cmd)
-
+static void idetape_create_load_unload_cmd (idetape_pc_t *pc,int cmd)
 {
-       union convert {
-               unsigned all    :32;
-               struct {
-                       unsigned b1     :8;
-                       unsigned b2     :8;
-                       unsigned b3     :8;
-                       unsigned b4     :8;
-               } b;
-       } original;
-       
-#if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: Creating SPACE packet command\n");
-#endif /* IDETAPE_DEBUG_LOG */
-
-       original.all=count;
-
-       pc->request_transfer=0;
-       pc->buffer=NULL;
-       pc->current_position=NULL;
-       pc->buffer_size=0;
-       pc->wait_for_dsc=1;
-       pc->callback=&idetape_pc_callback;
-       pc->writing=0;
-
-       idetape_zero_packet_command (pc);
-       pc->c [0]=IDETAPE_SPACE_CMD;
-       pc->c [1]=cmd;
-       pc->c [4]=original.b.b1;
-       pc->c [3]=original.b.b2;
-       pc->c [2]=original.b.b3;
-
-       return;
+       idetape_init_pc (pc);
+       pc->c[0] = IDETAPE_LOAD_UNLOAD_CMD;
+       pc->c[4] = cmd;
+       set_bit (PC_WAIT_FOR_DSC, &pc->flags);
+       pc->callback = &idetape_pc_callback;
 }
 
-void idetape_create_write_cmd (idetape_packet_command_t *pc,unsigned long length)
-
+static void idetape_create_erase_cmd (idetape_pc_t *pc)
 {
-       union convert {
-               unsigned all    :32;
-               struct {
-                       unsigned b1     :8;
-                       unsigned b2     :8;
-                       unsigned b3     :8;
-                       unsigned b4     :8;
-               } b;
-       } original;
-       
-#if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: Creating WRITE packet command\n");
-#endif /* IDETAPE_DEBUG_LOG */
-
-       original.all=length;
-
-       pc->wait_for_dsc=0;
-       pc->callback=&idetape_write_callback;
-       pc->writing=1;
-
-       idetape_zero_packet_command (pc);
-
-       pc->c [0]=IDETAPE_WRITE_CMD;
-       pc->c [1]=1;
-       pc->c [4]=original.b.b1;
-       pc->c [3]=original.b.b2;
-       pc->c [2]=original.b.b3;
-
-       if (length)
-               pc->dma_recommended=1;
-
-       return;
+       idetape_init_pc (pc);
+       pc->c[0] = IDETAPE_ERASE_CMD;
+       pc->c[1] = 1;
+       set_bit (PC_WAIT_FOR_DSC, &pc->flags);
+       pc->callback = &idetape_pc_callback;
 }
 
-void idetape_create_read_position_cmd (idetape_packet_command_t *pc)
-
+static void idetape_create_read_cmd (idetape_tape_t *tape, idetape_pc_t *pc, unsigned int length, struct buffer_head *bh)
 {
-#if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: Creating READ POSITION packet command\n");
-#endif /* IDETAPE_DEBUG_LOG */
-
-       pc->request_transfer=20;
-       pc->wait_for_dsc=0;
-       pc->callback=&idetape_read_position_callback;
-       pc->writing=0;
+       idetape_init_pc (pc);
+       pc->c[0] = IDETAPE_READ_CMD;
+       put_unaligned (htonl (length), (unsigned int *) &pc->c[1]);
+       pc->c[1] = 1;
+       pc->callback = &idetape_rw_callback;
+       pc->bh = bh;
+       bh->b_count = 0;
+       pc->buffer = NULL;
+       pc->request_transfer = pc->buffer_size = length * tape->tape_block_size;
+       if (pc->request_transfer == tape->stage_size)
+               set_bit (PC_DMA_RECOMMENDED, &pc->flags);
+}
 
-       idetape_zero_packet_command (pc);
-       pc->c [0]=IDETAPE_READ_POSITION_CMD;
-       pc->c [1]=0;
+static void idetape_create_space_cmd (idetape_pc_t *pc,int count,byte cmd)
+{
+       idetape_init_pc (pc);
+       pc->c[0] = IDETAPE_SPACE_CMD;
+       put_unaligned (htonl (count), (unsigned int *) &pc->c[1]);
+       pc->c[1] = cmd;
+       set_bit (PC_WAIT_FOR_DSC, &pc->flags);
+       pc->callback = &idetape_pc_callback;
 }
 
-void idetape_read_position_callback (ide_drive_t *drive)
+static void idetape_create_write_cmd (idetape_tape_t *tape, idetape_pc_t *pc, unsigned int length, struct buffer_head *bh)
+{
+       idetape_init_pc (pc);
+       pc->c[0] = IDETAPE_WRITE_CMD;
+       put_unaligned (htonl (length), (unsigned int *) &pc->c[1]);
+       pc->c[1] = 1;
+       pc->callback = &idetape_rw_callback;
+       set_bit (PC_WRITING, &pc->flags);
+       pc->bh = bh;
+       pc->b_data = bh->b_data;
+       pc->b_count = bh->b_count;
+       pc->buffer = NULL;
+       pc->request_transfer = pc->buffer_size = length * tape->tape_block_size;
+       if (pc->request_transfer == tape->stage_size)
+               set_bit (PC_DMA_RECOMMENDED, &pc->flags);
+}
 
+static void idetape_read_position_callback (ide_drive_t *drive)
 {
-       idetape_tape_t *tape;
-       struct request *rq;
+       idetape_tape_t *tape = drive->driver_data;
        idetape_read_position_result_t *result;
        
-       tape=&(drive->tape);
-       
 #if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: Reached idetape_read_position_callback\n");
+       printk (KERN_INFO "ide-tape: Reached idetape_read_position_callback\n");
 #endif /* IDETAPE_DEBUG_LOG */
 
-       rq=HWGROUP(drive)->rq;
-       
        if (!tape->pc->error) {
-               result=(idetape_read_position_result_t *) tape->pc->buffer;
+               result = (idetape_read_position_result_t *) tape->pc->buffer;
 #if IDETAPE_DEBUG_LOG
-               printk ("Request completed\n");
-               printk ("Dumping the results of the READ POSITION command\n");
-               printk ("BOP - %s\n",result->bop ? "Yes":"No");
-               printk ("EOP - %s\n",result->eop ? "Yes":"No");
+               printk (KERN_INFO "BOP - %s\n",result->bop ? "Yes":"No");
+               printk (KERN_INFO "EOP - %s\n",result->eop ? "Yes":"No");
 #endif /* IDETAPE_DEBUG_LOG */
                if (result->bpu) {
-                       printk ("ide-tape: Block location is unknown to the tape\n");
-                       printk ("Aborting request\n");
-                       tape->block_address_valid=0;
+                       printk (KERN_INFO "ide-tape: Block location is unknown to the tape\n");
+                       clear_bit (IDETAPE_ADDRESS_VALID, &tape->flags);
                        idetape_end_request (0,HWGROUP (drive));
-               }
-               else {
+               } else {
 #if IDETAPE_DEBUG_LOG
-                       printk ("Block Location - %lu\n",idetape_swap_long (result->first_block));
+                       printk (KERN_INFO "Block Location - %lu\n", ntohl (result->first_block));
 #endif /* IDETAPE_DEBUG_LOG */
-                       tape->block_address=idetape_swap_long (result->first_block);
-                       tape->block_address_valid=1;
+                       tape->partition = result->partition;
+                       tape->block_address = ntohl (result->first_block);
+                       set_bit (IDETAPE_ADDRESS_VALID, &tape->flags);
                        idetape_end_request (1,HWGROUP (drive));
                }
-       }
-       else {
-               printk ("Aborting request\n");
+       } else
                idetape_end_request (0,HWGROUP (drive));
-       }
-       return;
-}
-
-/*
- *     Our special ide-tape ioctl's.
- *
- *     Currently there aren't any significant ioctl's.
- *     mtio.h compatible commands should be issued to the character device
- *     interface.
- */
-int idetape_blkdev_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file,
-                       unsigned int cmd, unsigned long arg)
-{
-       idetape_packet_command_t pc;
-       
-       pc.buffer=pc.temp_buffer;
-       pc.buffer_size=IDETAPE_TEMP_BUFFER_SIZE;
-       pc.current_position=pc.temp_buffer;
-
-#if IDETAPE_DEBUG_LOG  
-       printk ("ide-tape: Reached idetape_blkdev_ioctl\n");
-#endif /* IDETAPE_DEBUG_LOG */
-       switch (cmd) {
-               default:
-                       return -EIO;
-       }
 }
 
-/*
- *     Functions which handle requests.
- */
-
-/*
- *     idetape_end_request is used to end a request.
- */
-
-void idetape_end_request (byte uptodate, ide_hwgroup_t *hwgroup)
-
+static void idetape_create_read_position_cmd (idetape_pc_t *pc)
 {
-       ide_drive_t *drive = hwgroup->drive;
-       struct request *rq = hwgroup->rq;
-       idetape_tape_t *tape = &(drive->tape);
-       unsigned int major = HWIF(drive)->major;
-       struct blk_dev_struct *bdev = &blk_dev[major];
-       int error;
-
-#if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_end_request\n");
-#endif /* IDETAPE_DEBUG_LOG */
-
-       bdev->current_request=rq;                       /* Since we may have taken it out */
-
-       if (!rq->errors)                                /* In case rq->errors is already set, */
-               rq->errors=!uptodate;                   /* we won't change it. */
-       error=rq->errors;
-               
-       if (tape->active_data_request == rq) {          /* The request was a pipelined data transfer request */
-
-               if (rq->cmd == IDETAPE_READ_REQUEST) {
-#if IDETAPE_DEBUG_BUGS
-                       if (tape->active_stage == NULL)
-                               printk ("ide-tape: bug: active_stage is NULL in idetape_end_request\n");
-                       else                            
-#endif /* IDETAPE_DEBUG_BUGS */
-                       idetape_copy_buffer_to_stage (tape->active_stage,tape->data_buffer);
-               }
-
-               tape->active_stage=NULL;
-               tape->active_data_request=NULL;
-
-               if (rq->cmd == IDETAPE_WRITE_REQUEST) {
-                       if (rq->errors)
-                               tape->error_in_pipeline_stage=rq->errors;
-                       idetape_remove_stage_head (drive);
-               }
-               
-               if (tape->next_stage == NULL) {
-                       if (!error)
-                               idetape_increase_max_pipeline_stages (drive);
-                       ide_end_drive_cmd (drive, 0, 0);
-                       return;
-               }
-
-               idetape_active_next_stage (drive);
-
-               /*
-                *      Insert the next request into the request queue.
-                *
-                *      The choice of using ide_next or ide_end is now left
-                *      to the user.
-                */
-                
-#if IDETAPE_LOW_TAPE_PRIORITY
-               (void) ide_do_drive_cmd (drive,tape->active_data_request,ide_end);
-#else
-               (void) ide_do_drive_cmd (drive,tape->active_data_request,ide_next);
-#endif /* IDETAPE_LOW_TAPE_PRIORITY */
-       }
-       ide_end_drive_cmd (drive, 0, 0);
+       idetape_init_pc (pc);
+       pc->c[0] = IDETAPE_READ_POSITION_CMD;
+       pc->request_transfer = 20;
+       pc->callback = &idetape_read_position_callback;
 }
 
 /*
  *     idetape_do_request is our request handling function.    
  */
-
-void idetape_do_request (ide_drive_t *drive, struct request *rq, unsigned long block)
-
+static void idetape_do_request (ide_drive_t *drive, struct request *rq, unsigned long block)
 {
-       idetape_tape_t *tape=&(drive->tape);
-       idetape_packet_command_t *pc;
-       unsigned int major = HWIF(drive)->major;
-       struct blk_dev_struct *bdev = &blk_dev[major];
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_pc_t *pc;
+       struct blk_dev_struct *bdev = &blk_dev[HWIF(drive)->major];
+       struct request *postponed_rq = tape->postponed_rq;
        idetape_status_reg_t status;
 
 #if IDETAPE_DEBUG_LOG
-       printk ("Current request:\n");
-       printk ("rq_status: %d, rq_dev: %u, cmd: %d, errors: %d\n",rq->rq_status,(unsigned int) rq->rq_dev,rq->cmd,rq->errors);
-       printk ("sector: %ld, nr_sectors: %ld, current_nr_sectors: %ld\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors);
+       printk (KERN_INFO "rq_status: %d, rq_dev: %u, cmd: %d, errors: %d\n",rq->rq_status,(unsigned int) rq->rq_dev,rq->cmd,rq->errors);
+       printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %ld\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors);
 #endif /* IDETAPE_DEBUG_LOG */
 
-       if (!IDETAPE_REQUEST_CMD (rq->cmd)) {
-
+       if (!IDETAPE_RQ_CMD (rq->cmd)) {
                /*
                 *      We do not support buffer cache originated requests.
                 */
-
-               printk ("ide-tape: Unsupported command in request queue\n");
-               printk ("ide-tape: The block device interface should not be used for data transfers.\n");
-               printk ("ide-tape: Use the character device interfaces\n");
-               printk ("ide-tape: /dev/ht0 and /dev/nht0 instead.\n");
-               printk ("ide-tape: (Run linux/scripts/MAKEDEV.ide to create them)\n");
-               printk ("ide-tape: Aborting request.\n");
-
+               printk (KERN_NOTICE "ide-tape: %s: Unsupported command in request queue\n", drive->name);
                ide_end_request (0,HWGROUP (drive));                    /* Let the common code handle it */
                return;
        }
@@ -2980,131 +2278,81 @@ void idetape_do_request (ide_drive_t *drive, struct request *rq, unsigned long b
         *
         *      The potential fragmentation inefficiency was pointed to me
         *      by Mark Lord.
+        *
+        *      Uhuh.. the following "fix" is actually not entirely correct.
+        *      Some day we should probably move to a per device request
+        *      queue, rather than per interface.
         */
-        
        if (rq->next != NULL && rq->rq_dev != rq->next->rq_dev)
                bdev->current_request=rq->next;
 
-       /* Retry a failed packet command */
-
+       /*
+        *      Retry a failed packet command
+        */
        if (tape->failed_pc != NULL && tape->pc->c[0] == IDETAPE_REQUEST_SENSE_CMD) {
-               idetape_issue_packet_command (drive,tape->failed_pc,&idetape_pc_intr);
+               idetape_issue_packet_command (drive, tape->failed_pc);
                return;
        }
-
-       /* Check if we have a postponed request */
-       
-       if (tape->postponed_rq != NULL) {
 #if IDETAPE_DEBUG_BUGS
-               if (tape->postponed_rq->rq_status != RQ_ACTIVE || rq != tape->postponed_rq) {
-                       printk ("ide-tape: ide-tape.c bug - Two DSC requests were queued\n");
+       if (postponed_rq != NULL)
+               if (postponed_rq->rq_status != RQ_ACTIVE || rq != postponed_rq) {
+                       printk (KERN_ERR "ide-tape: ide-tape.c bug - Two DSC requests were queued\n");
                        idetape_end_request (0,HWGROUP (drive));
                        return;
                }
 #endif /* IDETAPE_DEBUG_BUGS */
-               if (rq->cmd == IDETAPE_PACKET_COMMAND_REQUEST_TYPE1) {
-       
-                       /* Media access command */
-                       
-                       tape->postponed_rq = NULL;
-                       idetape_media_access_finished (drive);
-                       return;
-               }
-               
-               /*
-                * Read / Write command - DSC polling was done before the
-                * actual command - Continue normally so that the command
-                * will be performed below.
-                */
-                
-                tape->postponed_rq = NULL;
-       }       
 
-       status.all=IN_BYTE (IDETAPE_STATUS_REG);
+       tape->postponed_rq = NULL;
 
        /*
-        *      After a software reset, the status register is locked. We
-        *      will ignore the DSC value for our very first packet command,
-        *      which will restore DSC operation.
+        *      If the tape is still busy, postpone our request and service
+        *      the other device meanwhile.
         */
-
-       if (tape->reset_issued) {
-               status.b.dsc=1;
-               tape->reset_issued=0;
+       status.all = GET_STAT();
+       if (!clear_bit (IDETAPE_IGNORE_DSC, &tape->flags) && !status.b.dsc) {
+               if (postponed_rq == NULL) {
+                       tape->dsc_polling_start = jiffies;
+                       tape->dsc_polling_frequency = tape->best_dsc_rw_frequency;
+                       tape->dsc_timeout = jiffies + IDETAPE_DSC_RW_TIMEOUT;
+               } else if ((signed long) (jiffies - tape->dsc_timeout) > 0) {
+                       printk (KERN_ERR "ide-tape: %s: DSC timeout\n", tape->name);
+                       if (rq->cmd == IDETAPE_PC_RQ)
+                               idetape_media_access_finished (drive);
+                       else
+                               ide_do_reset (drive);
+                       return;
+               } else if (jiffies - tape->dsc_polling_start > IDETAPE_DSC_MA_THRESHOLD)
+                       tape->dsc_polling_frequency = IDETAPE_DSC_MA_SLOW;
+               idetape_postpone_request (drive);
+               return;
        }
-       
        switch (rq->cmd) {
-               case IDETAPE_READ_REQUEST:
-                       if (!status.b.dsc) {                            /* Tape buffer not ready to accept r/w command */
-#if IDETAPE_DEBUG_LOG
-                               printk ("ide-tape: DSC != 1 - Postponing read request\n");
-#endif /* IDETAPE_DEBUG_LOG */ 
-                               tape->dsc_polling_frequency=tape->best_dsc_rw_frequency;
-                               idetape_postpone_request (drive);       /* Allow ide.c to process requests from */
-                               return;
-                       }                       
-
+               case IDETAPE_READ_RQ:
                        pc=idetape_next_pc_storage (drive);
-
-                       idetape_create_read_cmd (pc,rq->current_nr_sectors);
-                       
-                       pc->buffer=rq->buffer;
-                       pc->buffer_size=rq->current_nr_sectors*tape->tape_block_size;
-                       pc->current_position=rq->buffer;
-                       pc->request_transfer=rq->current_nr_sectors*tape->tape_block_size;
-
-                       idetape_issue_packet_command (drive,pc,&idetape_pc_intr);
-                       return;
-               
-               case IDETAPE_WRITE_REQUEST:
-                       if (!status.b.dsc) {                            /* Tape buffer not ready to accept r/w command */
-#if IDETAPE_DEBUG_LOG
-                               printk ("ide-tape: DSC != 1 - Postponing write request\n");
-#endif /* IDETAPE_DEBUG_LOG */ 
-                               tape->dsc_polling_frequency=tape->best_dsc_rw_frequency;
-                               idetape_postpone_request (drive);       /* Allow ide.c to process requests from */
-                               return;
-                       }                       
-
+                       idetape_create_read_cmd (tape, pc, rq->current_nr_sectors, rq->bh);
+                       break;
+               case IDETAPE_WRITE_RQ:
                        pc=idetape_next_pc_storage (drive);
-
-                       idetape_create_write_cmd (pc,rq->current_nr_sectors);
-                       
-                       pc->buffer=rq->buffer;
-                       pc->buffer_size=rq->current_nr_sectors*tape->tape_block_size;
-                       pc->current_position=rq->buffer;
-                       pc->request_transfer=rq->current_nr_sectors*tape->tape_block_size;
-
-                       idetape_issue_packet_command (drive,pc,&idetape_pc_intr);
+                       idetape_create_write_cmd (tape, pc, rq->current_nr_sectors, rq->bh);
+                       break;
+               case IDETAPE_ABORTED_WRITE_RQ:
+                       rq->cmd = IDETAPE_WRITE_RQ;
+                       rq->errors = IDETAPE_ERROR_EOD;
+                       idetape_end_request (1, HWGROUP(drive));
                        return;
-                                       
-               case IDETAPE_PACKET_COMMAND_REQUEST_TYPE1:
-               case IDETAPE_PACKET_COMMAND_REQUEST_TYPE2:
-/*
- *     This should be unnecessary (postponing of a general packet command),
- *     but I have occasionally missed DSC on a media access command otherwise.
- *     ??? Still have to figure it out ...
- */
-                       if (!status.b.dsc) {                            /* Tape buffers are still not ready */
-#if IDETAPE_DEBUG_LOG
-                               printk ("ide-tape: DSC != 1 - Postponing packet command request\n");
-#endif /* IDETAPE_DEBUG_LOG */
-                               rq->cmd=IDETAPE_PACKET_COMMAND_REQUEST_TYPE2;   /* Note that we are waiting for DSC *before* we */
-                                                                               /* even issued the command */
-                               tape->dsc_polling_frequency=IDETAPE_DSC_READ_WRITE_FALLBACK_FREQUENCY;
-                               idetape_postpone_request (drive);       /* Allow ide.c to process requests from */
+               case IDETAPE_PC_RQ:
+                       if (postponed_rq != NULL) {
+                               idetape_media_access_finished (drive);
                                return;
                        }
-                       rq->cmd=IDETAPE_PACKET_COMMAND_REQUEST_TYPE1;
-                       pc=(idetape_packet_command_t *) rq->buffer;
-                       idetape_issue_packet_command (drive,pc,&idetape_pc_intr);
-                       return;
-#if IDETAPE_DEBUG_BUGS
+                       pc=(idetape_pc_t *) rq->buffer;
+                       break;
                default:
-                       printk ("ide-tape: bug in IDETAPE_REQUEST_CMD macro\n");
+                       printk (KERN_ERR "ide-tape: bug in IDETAPE_RQ_CMD macro\n");
                        idetape_end_request (0,HWGROUP (drive));
-#endif /* IDETAPE_DEBUG_BUGS */
-       }       
+                       return;
+       }
+       idetape_issue_packet_command (drive, pc);
 }
 
 /*
@@ -3127,51 +2375,16 @@ void idetape_do_request (ide_drive_t *drive, struct request *rq, unsigned long b
  *     the request to the request list without waiting for it to be serviced !
  *     In that case, we usually use idetape_queue_pc_head.
  */
-
-int idetape_queue_pc_tail (ide_drive_t *drive,idetape_packet_command_t *pc)
+static int idetape_queue_pc_tail (ide_drive_t *drive,idetape_pc_t *pc)
 {
        struct request rq;
 
        ide_init_drive_cmd (&rq);
        rq.buffer = (char *) pc;
-       rq.cmd = IDETAPE_PACKET_COMMAND_REQUEST_TYPE1;
+       rq.cmd = IDETAPE_PC_RQ;
        return ide_do_drive_cmd (drive, &rq, ide_wait);
 }
 
-/*
- *     idetape_queue_pc_head generates a new packet command request in front
- *     of the request queue, before the current request, so that it will be
- *     processed immediately, on the next pass through the driver.
- *
- *     idetape_queue_pc_head is called from the request handling part of
- *     the driver (the "bottom" part). Safe storage for the request should
- *     be allocated with idetape_next_pc_storage and idetape_next_rq_storage
- *     before calling idetape_queue_pc_head.
- *
- *     Memory for those requests is pre-allocated at initialization time, and
- *     is limited to IDETAPE_PC_STACK requests. We assume that we have enough
- *     space for the maximum possible number of inter-dependent packet commands.
- *
- *     The higher level of the driver - The ioctl handler and the character
- *     device handling functions should queue request to the lower level part
- *     and wait for their completion using idetape_queue_pc_tail or
- *     idetape_queue_rw_tail.
- */
-void idetape_queue_pc_head (ide_drive_t *drive,idetape_packet_command_t *pc,struct request *rq)
-
-{
-       unsigned int major = HWIF(drive)->major;
-       struct blk_dev_struct *bdev = &blk_dev[major];
-
-       bdev->current_request=HWGROUP (drive)->rq;              /* Since we may have taken it out */
-
-       ide_init_drive_cmd (rq);
-       rq->buffer = (char *) pc;
-       rq->cmd = IDETAPE_PACKET_COMMAND_REQUEST_TYPE1;
-       (void) ide_do_drive_cmd (drive, rq, ide_preempt);
-}
-
 /*
  *     idetape_wait_for_request installs a semaphore in a pending request
  *     and sleeps until it is serviced.
@@ -3179,20 +2392,17 @@ void idetape_queue_pc_head (ide_drive_t *drive,idetape_packet_command_t *pc,stru
  *     The caller should ensure that the request will not be serviced
  *     before we install the semaphore (usually by disabling interrupts).
  */
-void idetape_wait_for_request (struct request *rq)
-
+static void idetape_wait_for_request (struct request *rq)
 {
        struct semaphore sem = MUTEX_LOCKED;
 
 #if IDETAPE_DEBUG_BUGS
-       if (rq == NULL || !IDETAPE_REQUEST_CMD (rq->cmd)) {
-               printk ("ide-tape: bug: Trying to sleep on non-valid request\n");
-               return;         
+       if (rq == NULL || !IDETAPE_RQ_CMD (rq->cmd)) {
+               printk (KERN_ERR "ide-tape: bug: Trying to sleep on non-valid request\n");
+               return;
        }
 #endif /* IDETAPE_DEBUG_BUGS */
-
-       rq->sem=&sem;
+       rq->sem = &sem;
        down (&sem);
 }
 
@@ -3200,93 +2410,93 @@ void idetape_wait_for_request (struct request *rq)
  *     idetape_queue_rw_tail generates a read/write request for the block
  *     device interface and wait for it to be serviced.
  */
-
-int idetape_queue_rw_tail (ide_drive_t *drive,int cmd,int blocks,char *buffer)
-
+static int idetape_queue_rw_tail (ide_drive_t *drive, int cmd, int blocks, struct buffer_head *bh)
 {
-       idetape_tape_t *tape = &(drive->tape);
+       idetape_tape_t *tape = drive->driver_data;
        struct request rq;
 
 #if IDETAPE_DEBUG_LOG
-       printk ("idetape_queue_rw_tail: cmd=%d\n",cmd);
+       printk (KERN_INFO "idetape_queue_rw_tail: cmd=%d\n",cmd);
 #endif /* IDETAPE_DEBUG_LOG */
 #if IDETAPE_DEBUG_BUGS
-       if (tape->active_data_request != NULL) {
-               printk ("ide-tape: bug: the pipeline is active in idetape_queue_rw_tail\n");
+       if (idetape_pipeline_active (tape)) {
+               printk (KERN_ERR "ide-tape: bug: the pipeline is active in idetape_queue_rw_tail\n");
                return (0);
        }
 #endif /* IDETAPE_DEBUG_BUGS */        
 
        ide_init_drive_cmd (&rq);
-       rq.buffer = buffer;
+       rq.bh = bh;
        rq.cmd = cmd;
        rq.sector = tape->block_address;
        rq.nr_sectors = rq.current_nr_sectors = blocks;
        (void) ide_do_drive_cmd (drive, &rq, ide_wait);
 
-       return (tape->tape_block_size*(blocks-rq.current_nr_sectors));
+       idetape_init_merge_stage (tape);
+       if (rq.errors == IDETAPE_ERROR_GENERAL)
+               return -EIO;
+       return (tape->tape_block_size * (blocks-rq.current_nr_sectors));
 }
 
 /*
- *     idetape_add_chrdev_read_request handles character device read requests
- *     when operating in the pipelined operation mode.
+ *     idetape_add_chrdev_read_request is called from idetape_chrdev_read
+ *     to service a character device read request and add read-ahead
+ *     requests to our pipeline.
  */
-int idetape_add_chrdev_read_request (ide_drive_t *drive,int blocks,char *buffer)
-
+static int idetape_add_chrdev_read_request (ide_drive_t *drive,int blocks)
 {
-       idetape_tape_t *tape = &(drive->tape);
-       idetape_pipeline_stage_t *new_stage;
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_stage_t *new_stage;
        unsigned long flags;
        struct request rq,*rq_ptr;
        int bytes_read;
        
 #if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_add_chrdev_read_request\n");
+       printk (KERN_INFO "Reached idetape_add_chrdev_read_request\n");
 #endif /* IDETAPE_DEBUG_LOG */
 
        ide_init_drive_cmd (&rq);
-       rq.cmd = IDETAPE_READ_REQUEST;
+       rq.cmd = IDETAPE_READ_RQ;
        rq.sector = tape->block_address;
        rq.nr_sectors = rq.current_nr_sectors = blocks;
 
-       if (tape->active_data_request != NULL || tape->current_number_of_stages <= tape->max_number_of_stages / 4) {
-               new_stage=idetape_kmalloc_stage (drive);
+       if (idetape_pipeline_active (tape) || tape->nr_stages <= tape->max_stages / 4) {
+               new_stage=idetape_kmalloc_stage (tape);
                while (new_stage != NULL) {
                        new_stage->rq=rq;
-                       save_flags (flags);cli ();
                        idetape_add_stage_tail (drive,new_stage);
-                       restore_flags (flags);
-                       new_stage=idetape_kmalloc_stage (drive);
+                       new_stage=idetape_kmalloc_stage (tape);
                }
-               if (tape->active_data_request == NULL)
+               if (!idetape_pipeline_active (tape))
                        idetape_insert_pipeline_into_queue (drive);
        }
-
        if (tape->first_stage == NULL) {
-
                /*
                 *      Linux is short on memory. Revert to non-pipelined
                 *      operation mode for this request.
                 */
-                
-               return (idetape_queue_rw_tail (drive,IDETAPE_READ_REQUEST,blocks,buffer));
-       }               
-       
-       save_flags (flags);cli ();
-       if (tape->active_data_request == &(tape->first_stage->rq))
+               return (idetape_queue_rw_tail (drive, IDETAPE_READ_RQ, blocks, tape->merge_stage->bh));
+       }
+       save_flags (flags);
+       cli ();
+       if (tape->active_stage == tape->first_stage)
                idetape_wait_for_request (tape->active_data_request);
        restore_flags (flags);
 
-       rq_ptr=&(tape->first_stage->rq);
-       bytes_read=tape->tape_block_size*(rq_ptr->nr_sectors-rq_ptr->current_nr_sectors);
-       rq_ptr->nr_sectors=rq_ptr->current_nr_sectors=0;
-       idetape_copy_buffer_from_stage (tape->first_stage,buffer);
-       if (rq_ptr->errors != IDETAPE_RQ_ERROR_FILEMARK)
+       rq_ptr = &tape->first_stage->rq;
+       bytes_read = tape->tape_block_size * (rq_ptr->nr_sectors - rq_ptr->current_nr_sectors);
+       rq_ptr->nr_sectors = rq_ptr->current_nr_sectors = 0;
+
+       idetape_switch_buffers (tape, tape->first_stage);
+
+       if (rq_ptr->errors != IDETAPE_ERROR_FILEMARK) {
+               clear_bit (IDETAPE_FILEMARK, &tape->flags);
                idetape_remove_stage_head (drive);
+       } else
+               set_bit (IDETAPE_FILEMARK, &tape->flags);
 #if IDETAPE_DEBUG_BUGS
        if (bytes_read > blocks*tape->tape_block_size) {
-               printk ("ide-tape: bug: trying to return more bytes than requested\n");
+               printk (KERN_ERR "ide-tape: bug: trying to return more bytes than requested\n");
                bytes_read=blocks*tape->tape_block_size;
        }
 #endif /* IDETAPE_DEBUG_BUGS */
@@ -3304,60 +2514,47 @@ int idetape_add_chrdev_read_request (ide_drive_t *drive,int blocks,char *buffer)
  *     3.      If we still can't allocate a stage, fallback to
  *             non-pipelined operation mode for this request.
  */
-
-int idetape_add_chrdev_write_request (ide_drive_t *drive,int blocks,char *buffer)
-
+static int idetape_add_chrdev_write_request (ide_drive_t *drive, int blocks)
 {
-       idetape_tape_t *tape = &(drive->tape);
-       idetape_pipeline_stage_t *new_stage;
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_stage_t *new_stage;
        unsigned long flags;
        struct request *rq;
 
 #if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_add_chrdev_write_request\n");
+       printk (KERN_INFO "Reached idetape_add_chrdev_write_request\n");
 #endif /* IDETAPE_DEBUG_LOG */
-       
-       
-       new_stage=idetape_kmalloc_stage (drive);
 
-       /*
-        *      If we don't have a new stage, wait for more and more requests
-        *      to finish, and try to allocate after each one.
-        *
+       /*
+        *      Attempt to allocate a new stage.
         *      Pay special attention to possible race conditions.
         */
-
-       while (new_stage == NULL) {
-               save_flags (flags);cli ();
-               if (tape->active_data_request != NULL) {
+       while ((new_stage = idetape_kmalloc_stage (tape)) == NULL) {
+               save_flags (flags);
+               cli ();
+               if (idetape_pipeline_active (tape)) {
                        idetape_wait_for_request (tape->active_data_request);
                        restore_flags (flags);
-                       new_stage=idetape_kmalloc_stage (drive);
-               }
-               else {
+               } else {
+                       restore_flags (flags);
+                       idetape_insert_pipeline_into_queue (drive);
+                       if (idetape_pipeline_active (tape))
+                               continue;
                        /*
                         *      Linux is short on memory. Fallback to
                         *      non-pipelined operation mode for this request.
                         */
-                       
-                       restore_flags (flags);
-                       return (idetape_queue_rw_tail (drive,IDETAPE_WRITE_REQUEST,blocks,buffer));
+                       return idetape_queue_rw_tail (drive, IDETAPE_WRITE_RQ, blocks, tape->merge_stage->bh);
                }
        }
-
-       rq=&(new_stage->rq);
-
+       rq = &new_stage->rq;
        ide_init_drive_cmd (rq);
-       rq->cmd = IDETAPE_WRITE_REQUEST;
+       rq->cmd = IDETAPE_WRITE_RQ;
        rq->sector = tape->block_address;       /* Doesn't actually matter - We always assume sequential access */
-       rq->nr_sectors = blocks;
-       rq->current_nr_sectors = blocks;
+       rq->nr_sectors = rq->current_nr_sectors = blocks;
 
-       idetape_copy_buffer_to_stage (new_stage,buffer);
-
-       save_flags (flags);cli ();
+       idetape_switch_buffers (tape, new_stage);
        idetape_add_stage_tail (drive,new_stage);
-       restore_flags (flags);
 
        /*
         *      Check if we are currently servicing requests in the bottom
@@ -3367,111 +2564,127 @@ int idetape_add_chrdev_write_request (ide_drive_t *drive,int blocks,char *buffer
         *      starting to service requests, so that we will be able to
         *      keep up with the higher speeds of the tape.
         */
+       if (!idetape_pipeline_active (tape) && tape->nr_stages >= (3 * tape->max_stages) / 4)
+               idetape_insert_pipeline_into_queue (drive);
 
-       if (tape->active_data_request == NULL && tape->current_number_of_stages >= (3 * tape->max_number_of_stages) / 4)
-               idetape_insert_pipeline_into_queue (drive);             
-
-       if (tape->error_in_pipeline_stage) {            /* Return a deferred error */
-               tape->error_in_pipeline_stage=0;
-               return (-EIO);
-       }
-       
-       return (blocks);
+       if (clear_bit (IDETAPE_PIPELINE_ERROR, &tape->flags))           /* Return a deferred error */
+               return -EIO;
+       return blocks;
 }
 
-void idetape_discard_read_pipeline (ide_drive_t *drive)
-
+static void idetape_discard_read_pipeline (ide_drive_t *drive)
 {
-       idetape_tape_t *tape = &(drive->tape);
+       idetape_tape_t *tape = drive->driver_data;
        unsigned long flags;
 
 #if IDETAPE_DEBUG_BUGS
        if (tape->chrdev_direction != idetape_direction_read) {
-               printk ("ide-tape: bug: Trying to discard read pipeline, but we are not reading.\n");
+               printk (KERN_ERR "ide-tape: bug: Trying to discard read pipeline, but we are not reading.\n");
                return;
        }
 #endif /* IDETAPE_DEBUG_BUGS */
-
-       tape->merge_buffer_size=tape->merge_buffer_offset=0;
-       tape->chrdev_direction=idetape_direction_none;
+       tape->merge_stage_size = 0;
+       if (tape->merge_stage != NULL) {
+               __idetape_kfree_stage (tape->merge_stage);
+               tape->merge_stage = NULL;
+       }
+       tape->chrdev_direction = idetape_direction_none;
        
        if (tape->first_stage == NULL)
                return;
                
-       save_flags (flags);cli ();
-       tape->next_stage=NULL;
-       if (tape->active_data_request != NULL)
+       save_flags (flags);
+       cli ();
+       tape->next_stage = NULL;
+       if (idetape_pipeline_active (tape))
                idetape_wait_for_request (tape->active_data_request);
        restore_flags (flags);
 
        while (tape->first_stage != NULL)
                idetape_remove_stage_head (drive);
-
-#if IDETAPE_PIPELINE
-       tape->max_number_of_stages=IDETAPE_MIN_PIPELINE_STAGES;
-#else
-       tape->max_number_of_stages=0;
-#endif /* IDETAPE_PIPELINE */
+       tape->nr_pending_stages = 0;
+       tape->max_stages = IDETAPE_MIN_PIPELINE_STAGES;
 }
 
 /*
  *     idetape_wait_for_pipeline will wait until all pending pipeline
  *     requests are serviced. Typically called on device close.
  */
-void idetape_wait_for_pipeline (ide_drive_t *drive)
-
+static void idetape_wait_for_pipeline (ide_drive_t *drive)
 {
-       idetape_tape_t *tape = &(drive->tape);
+       idetape_tape_t *tape = drive->driver_data;
        unsigned long flags;
 
-       if (tape->active_data_request == NULL)
-               idetape_insert_pipeline_into_queue (drive);             
-
-       save_flags (flags);cli ();
-       if (tape->active_data_request == NULL) {
-               restore_flags (flags);
-               return;
-       }
-       
-       if (tape->last_stage != NULL)
-               idetape_wait_for_request (&(tape->last_stage->rq));
+       if (!idetape_pipeline_active (tape))
+               idetape_insert_pipeline_into_queue (drive);
 
-       else if (tape->active_data_request != NULL)
-               idetape_wait_for_request (tape->active_data_request);
+       save_flags (flags);
+       cli ();
+       if (!idetape_pipeline_active (tape))
+               goto abort;
+#if IDETAPE_DEBUG_BUGS
+       if (tape->last_stage == NULL)
+               printk ("ide-tape: tape->last_stage == NULL\n");
+       else
+#endif /* IDETAPE_DEBUG_BUGS */
+       idetape_wait_for_request (&tape->last_stage->rq);
+abort:
        restore_flags (flags);
 }
 
-void idetape_empty_write_pipeline (ide_drive_t *drive)
+static void idetape_pad_zeros (ide_drive_t *drive, int bcount)
+{
+       idetape_tape_t *tape = drive->driver_data;
+       struct buffer_head *bh;
+       int count, blocks;
+       
+       while (bcount) {
+               bh = tape->merge_stage->bh;
+               count = IDETAPE_MIN (tape->stage_size, bcount);
+               bcount -= count;
+               blocks = count / tape->tape_block_size;
+               while (count) {
+                       bh->b_count = IDETAPE_MIN (count, bh->b_size);
+                       memset (bh->b_data, 0, bh->b_count);
+                       count -= bh->b_count;
+                       bh = bh->b_reqnext;
+               }
+               idetape_queue_rw_tail (drive, IDETAPE_WRITE_RQ, blocks, tape->merge_stage->bh);
+       }
+}
 
+static void idetape_empty_write_pipeline (ide_drive_t *drive)
 {
-       idetape_tape_t *tape = &(drive->tape);
-       int blocks;
+       idetape_tape_t *tape = drive->driver_data;
+       int blocks, i;
        
 #if IDETAPE_DEBUG_BUGS
        if (tape->chrdev_direction != idetape_direction_write) {
-               printk ("ide-tape: bug: Trying to empty write pipeline, but we are not writing.\n");
+               printk (KERN_ERR "ide-tape: bug: Trying to empty write pipeline, but we are not writing.\n");
                return;
        }
-       if (tape->merge_buffer_size > tape->data_buffer_size) {
-               printk ("ide-tape: bug: merge_buffer too big\n");
-               tape->merge_buffer_size = tape->data_buffer_size;
+       if (tape->merge_stage_size > tape->stage_size) {
+               printk (KERN_ERR "ide-tape: bug: merge_buffer too big\n");
+               tape->merge_stage_size = tape->stage_size;
        }
 #endif /* IDETAPE_DEBUG_BUGS */
-
-       if (tape->merge_buffer_size) {
-               blocks=tape->merge_buffer_size/tape->tape_block_size;
-               if (tape->merge_buffer_size % tape->tape_block_size) {
+       if (tape->merge_stage_size) {
+               blocks=tape->merge_stage_size/tape->tape_block_size;
+               if (tape->merge_stage_size % tape->tape_block_size) {
                        blocks++;
-                       memset (tape->merge_buffer+tape->merge_buffer_size,0,tape->data_buffer_size-tape->merge_buffer_size);
+                       i = tape->tape_block_size - tape->merge_stage_size % tape->tape_block_size;
+                       memset (tape->merge_stage->bh->b_data + tape->merge_stage->bh->b_count, 0, i);
+                       tape->merge_stage->bh->b_count += i;
                }
-               (void) idetape_add_chrdev_write_request (drive,blocks,tape->merge_buffer);
-               tape->merge_buffer_size=0;
+               (void) idetape_add_chrdev_write_request (drive, blocks);
+               tape->merge_stage_size = 0;
        }
-       
        idetape_wait_for_pipeline (drive);
-
-       tape->error_in_pipeline_stage=0;
+       if (tape->merge_stage != NULL) {
+               __idetape_kfree_stage (tape->merge_stage);
+               tape->merge_stage = NULL;
+       }
+       clear_bit (IDETAPE_PIPELINE_ERROR, &tape->flags);
        tape->chrdev_direction=idetape_direction_none;
 
        /*
@@ -3480,185 +2693,241 @@ void idetape_empty_write_pipeline (ide_drive_t *drive)
         *       as some systems are constantly on, and the system load
         *       can be totally different on the next backup).
         */
-
-#if IDETAPE_PIPELINE
-       tape->max_number_of_stages=IDETAPE_MIN_PIPELINE_STAGES;
-#else
-       tape->max_number_of_stages=0;
-#endif /* IDETAPE_PIPELINE */
+       tape->max_stages = IDETAPE_MIN_PIPELINE_STAGES;
 #if IDETAPE_DEBUG_BUGS
-       if (tape->first_stage != NULL || tape->next_stage != NULL || tape->last_stage != NULL || tape->current_number_of_stages != 0) {
-               printk ("ide-tape: ide-tape pipeline bug\n");           
+       if (tape->first_stage != NULL || tape->next_stage != NULL || tape->last_stage != NULL || tape->nr_stages != 0) {
+               printk (KERN_ERR "ide-tape: ide-tape pipeline bug\n");          
+       }
+#endif /* IDETAPE_DEBUG_BUGS */
+}
+
+static int idetape_pipeline_size (ide_drive_t *drive)
+{
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_stage_t *stage;
+       struct request *rq;
+       int size = 0;
+
+       idetape_wait_for_pipeline (drive);
+       stage = tape->first_stage;
+       while (stage != NULL) {
+               rq = &stage->rq;
+               size += tape->tape_block_size * (rq->nr_sectors-rq->current_nr_sectors);
+               if (rq->errors == IDETAPE_ERROR_FILEMARK)
+                       size += tape->tape_block_size;
+               stage = stage->next;
        }
-#endif /* IDETAPE_DEBUG_BUGS */
+       size += tape->merge_stage_size;
+       return size;
 }
 
 /*
- *     idetape_zero_packet_command just zeros a packet command and
- *     sets the number of retries to 0, as we haven't retried it yet.
+ *     idetape_position_tape positions the tape to the requested block
+ *     using the LOCATE packet command. A READ POSITION command is then
+ *     issued to check where we are positioned.
+ *
+ *     Like all higher level operations, we queue the commands at the tail
+ *     of the request queue and wait for their completion.
+ *     
  */
-void idetape_zero_packet_command (idetape_packet_command_t *pc)
+static int idetape_position_tape (ide_drive_t *drive, unsigned int block, byte partition)
+{
+       int retval;
+       idetape_pc_t pc;
+
+       idetape_create_locate_cmd (&pc, block, partition);
+       retval=idetape_queue_pc_tail (drive,&pc);
+       if (retval) return (retval);
+
+       idetape_create_read_position_cmd (&pc);
+       return (idetape_queue_pc_tail (drive,&pc));
+}
 
+/*
+ *     Rewinds the tape to the Beginning Of the current Partition (BOP).
+ *
+ *     We currently support only one partition.
+ */ 
+static int idetape_rewind_tape (ide_drive_t *drive)
 {
-       int i;
+       int retval;
+       idetape_pc_t pc;
+#if IDETAPE_DEBUG_LOG
+       printk (KERN_INFO "Reached idetape_rewind_tape\n");
+#endif /* IDETAPE_DEBUG_LOG */ 
        
-       for (i=0;i<12;i++)
-               pc->c[i]=0;
-       pc->retries=0;
-       pc->abort=0;
-       pc->dma_recommended=0;
-       pc->dma_error=0;
+       idetape_create_rewind_cmd (&pc);
+       retval=idetape_queue_pc_tail (drive,&pc);
+       if (retval) return (retval);
+
+       idetape_create_read_position_cmd (&pc);
+       return (idetape_queue_pc_tail (drive,&pc));
+}
+
+static int idetape_flush_tape_buffers (ide_drive_t *drive)
+{
+       idetape_pc_t pc;
+
+       idetape_create_write_filemark_cmd (&pc,0);
+       return (idetape_queue_pc_tail (drive,&pc));
 }
 
 /*
- *     idetape_swap_shorts converts a 16 bit number from little endian
- *     to big endian format.
+ *     Our special ide-tape ioctl's.
+ *
+ *     Currently there aren't any ioctl's.
+ *     mtio.h compatible commands should be issued to the character device
+ *     interface.
  */
-unsigned short idetape_swap_short (unsigned short temp)
-
+static int idetape_blkdev_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file,
+                                unsigned int cmd, unsigned long arg)
 {
-       union convert {
-               unsigned all    :16;
-               struct {
-                       unsigned b1     :8;
-                       unsigned b2     :8;
-               } b;
-       } original,converted;
-       
-       original.all=temp;
-       converted.b.b1=original.b.b2;
-       converted.b.b2=original.b.b1;
-       return (converted.all);
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_config_t config;
+
+#if IDETAPE_DEBUG_LOG  
+       printk (KERN_INFO "ide-tape: Reached idetape_blkdev_ioctl\n");
+#endif /* IDETAPE_DEBUG_LOG */
+       switch (cmd) {
+               case 0x0340:
+                       if (copy_from_user ((char *) &config, (char *) arg, sizeof (idetape_config_t)))
+                               return -EFAULT;
+                       tape->best_dsc_rw_frequency = config.dsc_rw_frequency;
+                       tape->max_stages = config.nr_stages;
+                       break;
+               case 0x0350:
+                       config.dsc_rw_frequency = (int) tape->best_dsc_rw_frequency;
+                       config.nr_stages = tape->max_stages; 
+                       if (copy_to_user ((char *) arg, (char *) &config, sizeof (idetape_config_t)))
+                               return -EFAULT;
+                       break;
+               default:
+                       return -EIO;
+       }
+       return 0;
 }
 
 /*
- *     idetape_swap_long converts from little endian to big endian format.
+ *     The block device interface should not be used for data transfers.
+ *     However, we still allow opening it so that we can issue general
+ *     ide driver configuration ioctl's, such as the interrupt unmask feature.
  */
-unsigned long idetape_swap_long (unsigned long temp)
-
+static int idetape_blkdev_open (struct inode *inode, struct file *filp, ide_drive_t *drive)
 {
-       union convert {
-               unsigned all    :32;
-               struct {
-                       unsigned b1     :8;
-                       unsigned b2     :8;
-                       unsigned b3     :8;
-                       unsigned b4     :8;
-               } b;
-       } original,converted;
-       
-       original.all=temp;
-       converted.b.b1=original.b.b4;
-       converted.b.b2=original.b.b3;
-       converted.b.b3=original.b.b2;
-       converted.b.b4=original.b.b1;
-       return (converted.all);
+       MOD_INC_USE_COUNT;
+       return 0;
 }
 
+static void idetape_blkdev_release (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+       MOD_DEC_USE_COUNT;
+}
 
 /*
- *     idetape_next_pc_storage returns a pointer to a place in which we can
- *     safely store a packet command, even though we intend to leave the
- *     driver. A storage space for a maximum of IDETAPE_PC_STACK packet
- *     commands is allocated at initialization time.
+ *     idetape_pre_reset is called before an ATAPI/ATA software reset.
  */
-idetape_packet_command_t *idetape_next_pc_storage (ide_drive_t *drive)
-
+static void idetape_pre_reset (ide_drive_t *drive)
 {
-       idetape_tape_t *tape;
-       
-       tape=&(drive->tape);
-#if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: pc_stack_index=%d\n",tape->pc_stack_index);
-#endif /* IDETAPE_DEBUG_LOG */
-       if (tape->pc_stack_index==IDETAPE_PC_STACK)
-               tape->pc_stack_index=0;
-       return (&(tape->pc_stack [tape->pc_stack_index++]));
+       idetape_tape_t *tape = drive->driver_data;
+       if (tape != NULL)
+               set_bit (IDETAPE_IGNORE_DSC, &tape->flags);
 }
 
 /*
- *     idetape_next_rq_storage is used along with idetape_next_pc_storage.
- *     Since we queue packet commands in the request queue, we need to
- *     allocate a request, along with the allocation of a packet command.
+ *     Character device interface functions
  */
-/**************************************************************
- *                                                            *
- *  This should get fixed to use kmalloc(GFP_ATOMIC, ..)      *
- *  followed later on by kfree().   -ml                       *
- *                                                            *
- **************************************************************/
-struct request *idetape_next_rq_storage (ide_drive_t *drive)
-
+static ide_drive_t *get_drive_ptr (kdev_t i_rdev)
 {
-       idetape_tape_t *tape;
+       unsigned int i = MINOR(i_rdev) & ~0x80;
        
-       tape=&(drive->tape);
-
-#if IDETAPE_DEBUG_LOG
-       printk ("ide-tape: rq_stack_index=%d\n",tape->rq_stack_index);
-#endif /* IDETAPE_DEBUG_LOG */
-       if (tape->rq_stack_index==IDETAPE_PC_STACK)
-               tape->rq_stack_index=0;
-       return (&(tape->rq_stack [tape->rq_stack_index++]));
+       if (i >= MAX_HWIFS * MAX_DRIVES)
+               return NULL;
+       return (idetape_chrdevs[i].drive);
 }
 
 /*
- *     Block device interface functions
+ *     idetape_space_over_filemarks is now a bit more complicated than just
+ *     passing the command to the tape since we may have crossed some
+ *     filemarks during our pipelined read-ahead mode.
  *
- *     The block device interface should not be used for data transfers.
- *     However, we still allow opening it so that we can issue general
- *     ide driver configuration ioctl's, such as the interrupt unmask feature.
+ *     As a minor side effect, the pipeline enables us to support MTFSFM when
+ *     the filemark is in our internal pipeline even if the tape doesn't
+ *     support spacing over filemarks in the reverse direction.
  */
-
-int idetape_blkdev_open (struct inode *inode, struct file *filp, ide_drive_t *drive)
-
+static int idetape_space_over_filemarks (ide_drive_t *drive,short mt_op,int mt_count)
 {
-       idetape_tape_t *tape=&(drive->tape);
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_pc_t pc;
        unsigned long flags;
-                       
-       save_flags (flags);cli ();
-
-#if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_blkdev_open\n");
-#endif /* IDETAPE_DEBUG_LOG */
-
-       if (tape->busy) {
-               restore_flags (flags);          /* Allowing access only through one */
-               return (-EBUSY);                /* one file descriptor */
-       }
-
-       tape->busy=1;
-       restore_flags (flags);
-
-       return (0);
-}
+       int retval,count=0;
 
-void idetape_blkdev_release (struct inode *inode, struct file *filp, ide_drive_t *drive)
+       if (tape->chrdev_direction == idetape_direction_read) {
 
-{
-       idetape_tape_t *tape=&(drive->tape);
-       unsigned long flags;
-                       
-#if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_blkdev_release\n");
-#endif /* IDETAPE_DEBUG_LOG */
+               /*
+                *      We have a read-ahead buffer. Scan it for crossed
+                *      filemarks.
+                */
+               tape->merge_stage_size = 0;
+               clear_bit (IDETAPE_FILEMARK, &tape->flags);
+               while (tape->first_stage != NULL) {
+                       /*
+                        *      Wait until the first read-ahead request
+                        *      is serviced.
+                        */
+                       save_flags (flags);
+                       cli ();
+                       if (tape->active_stage == tape->first_stage)
+                               idetape_wait_for_request (tape->active_data_request);
+                       restore_flags (flags);
 
-       save_flags (flags);cli ();
-       tape->busy=0;
-       restore_flags (flags);
+                       if (tape->first_stage->rq.errors == IDETAPE_ERROR_FILEMARK)
+                               count++;
+                       if (count == mt_count) {
+                               switch (mt_op) {
+                                       case MTFSF:
+                                               idetape_remove_stage_head (drive);
+                                       case MTFSFM:
+                                               return (0);
+                                       default:
+                                               break;
+                               }
+                       }
+                       idetape_remove_stage_head (drive);
+               }
+               idetape_discard_read_pipeline (drive);
+       }
 
-       return;
+       /*
+        *      The filemark was not found in our internal pipeline.
+        *      Now we can issue the space command.
+        */
+       switch (mt_op) {
+               case MTFSF:
+                       idetape_create_space_cmd (&pc,mt_count-count,IDETAPE_SPACE_OVER_FILEMARK);
+                       return (idetape_queue_pc_tail (drive,&pc));
+               case MTFSFM:
+                       if (!tape->capabilities.sprev)
+                               return (-EIO);
+                       retval = idetape_space_over_filemarks (drive, MTFSF, mt_count-count);
+                       if (retval) return (retval);
+                       return (idetape_space_over_filemarks (drive, MTBSF, 1));
+               case MTBSF:
+                       if (!tape->capabilities.sprev)
+                               return (-EIO);
+                       idetape_create_space_cmd (&pc,-(mt_count+count),IDETAPE_SPACE_OVER_FILEMARK);
+                       return (idetape_queue_pc_tail (drive,&pc));
+               case MTBSFM:
+                       if (!tape->capabilities.sprev)
+                               return (-EIO);
+                       retval = idetape_space_over_filemarks (drive, MTBSF, mt_count+count);
+                       if (retval) return (retval);
+                       return (idetape_space_over_filemarks (drive, MTFSF, 1));
+               default:
+                       printk (KERN_ERR "ide-tape: MTIO operation %d not supported\n",mt_op);
+                       return (-EIO);
+       }
 }
 
-/*
- *     Character device interface functions
- */
 
 /*
  *     Our character device read / write functions.
@@ -3667,10 +2936,6 @@ void idetape_blkdev_release (struct inode *inode, struct file *filp, ide_drive_t
  *     an integral number of the "continuous transfer limit", which is
  *     a parameter of the specific tape (26 KB on my particular tape).
  *
- *     For best results use an integral number of the tape's parameter
- *     (which is displayed in the driver installation stage and is returned
- *      by the MTIOCGET ioctl).
- *
  *     As of version 1.3 of the driver, the character device provides an
  *     abstract continuous view of the media - any mix of block sizes (even 1
  *     byte) on the same backup/restore procedure is supported. The driver
@@ -3679,226 +2944,150 @@ void idetape_blkdev_release (struct inode *inode, struct file *filp, ide_drive_t
  *     size will only result in a (slightly) increased driver overhead, but
  *     will no longer hit performance.
  */
-
-int idetape_chrdev_read (struct inode *inode, struct file *file, char *buf, int count)
-
+static long idetape_chrdev_read (struct inode *inode, struct file *file, char *buf, unsigned long count)
 {
-       ide_drive_t *drive=idetape_chrdev.drive;
-       idetape_tape_t *tape=&(drive->tape);
-       char *buf_ptr=buf;
-       int bytes_read,temp,actually_read=0;
+       ide_drive_t *drive = get_drive_ptr (inode->i_rdev);
+       idetape_tape_t *tape = drive->driver_data;
+       int bytes_read,temp,actually_read=0, original_count = count;
 
 #if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_chrdev_read\n");
+       printk (KERN_INFO "Reached idetape_chrdev_read\n");
 #endif /* IDETAPE_DEBUG_LOG */
-
+       
        if (tape->chrdev_direction != idetape_direction_read) {         /* Initialize read operation */
                if (tape->chrdev_direction == idetape_direction_write) {
                        idetape_empty_write_pipeline (drive);
                        idetape_flush_tape_buffers (drive);
                }
-               
+#if IDETAPE_DEBUG_BUGS
+               if (tape->merge_stage || tape->merge_stage_size) {
+                       printk (KERN_ERR "ide-tape: merge_stage_size should be 0 now\n");
+                       tape->merge_stage_size = 0;
+               }
+#endif /* IDETAPE_DEBUG_BUGS */
+               if ((tape->merge_stage = __idetape_kmalloc_stage (tape)) == NULL)
+                       return -ENOMEM;
+               tape->chrdev_direction = idetape_direction_read;
+
                /*
                 *      Issue a read 0 command to ensure that DSC handshake
                 *      is switched from completion mode to buffer available
                 *      mode.
                 */
-                
-               bytes_read=idetape_queue_rw_tail (drive,IDETAPE_READ_REQUEST,0,tape->merge_buffer);
-               if (bytes_read < 0)
-                       return (bytes_read);
-
-               tape->chrdev_direction=idetape_direction_read;
+               bytes_read = idetape_queue_rw_tail (drive, IDETAPE_READ_RQ, 0, tape->merge_stage->bh);
+               if (bytes_read < 0) {
+                       kfree (tape->merge_stage);
+                       tape->merge_stage = NULL;
+                       tape->chrdev_direction = idetape_direction_none;
+                       return bytes_read;
+               }
+               if (test_bit (IDETAPE_DETECT_BS, &tape->flags))
+                       if (count > tape->tape_block_size && (count % tape->tape_block_size) == 0)
+                               tape->user_bs_factor = count / tape->tape_block_size;
        }
-       
        if (count==0)
                return (0);
-
-       if (tape->merge_buffer_size) {
-#if IDETAPE_DEBUG_BUGS
-               if (tape->merge_buffer_offset+tape->merge_buffer_size > tape->data_buffer_size) {
-                       printk ("ide-tape: bug: merge buffer too big\n");
-                       tape->merge_buffer_offset=0;tape->merge_buffer_size=tape->data_buffer_size-1;
-               }
-#endif /* IDETAPE_DEBUG_BUGS */
-               actually_read=IDETAPE_MIN (tape->merge_buffer_size,count);
-               copy_to_user (buf_ptr,tape->merge_buffer+tape->merge_buffer_offset,actually_read);
-               buf_ptr+=actually_read;tape->merge_buffer_size-=actually_read;
-               count-=actually_read;tape->merge_buffer_offset+=actually_read;
+       if (tape->merge_stage_size) {
+               actually_read=IDETAPE_MIN (tape->merge_stage_size,count);
+               idetape_copy_stage_to_user (tape, buf, tape->merge_stage, actually_read);
+               buf += actually_read; tape->merge_stage_size -= actually_read; count-=actually_read;
        }
-
-       while (count >= tape->data_buffer_size) {
-               bytes_read=idetape_add_chrdev_read_request (drive,tape->capabilities.ctl,tape->merge_buffer);
+       while (count >= tape->stage_size) {
+               bytes_read=idetape_add_chrdev_read_request (drive, tape->capabilities.ctl);
                if (bytes_read <= 0)
-                       return (actually_read);
-               copy_to_user (buf_ptr,tape->merge_buffer,bytes_read);
-               buf_ptr+=bytes_read;count-=bytes_read;actually_read+=bytes_read;
+                       goto finish;
+               idetape_copy_stage_to_user (tape, buf, tape->merge_stage, bytes_read);
+               buf += bytes_read; count -= bytes_read; actually_read += bytes_read;
        }
-
        if (count) {
-               bytes_read=idetape_add_chrdev_read_request (drive,tape->capabilities.ctl,tape->merge_buffer);
+               bytes_read=idetape_add_chrdev_read_request (drive, tape->capabilities.ctl);
                if (bytes_read <= 0)
-                       return (actually_read);
+                       goto finish;
                temp=IDETAPE_MIN (count,bytes_read);
-               copy_to_user (buf_ptr,tape->merge_buffer,temp);
+               idetape_copy_stage_to_user (tape, buf, tape->merge_stage, temp);
                actually_read+=temp;
-               tape->merge_buffer_offset=temp;
-               tape->merge_buffer_size=bytes_read-temp;
+               tape->merge_stage_size=bytes_read-temp;
        }
+finish:
+       if (actually_read < original_count && test_bit (IDETAPE_FILEMARK, &tape->flags))
+               idetape_space_over_filemarks (drive, MTFSF, 1);
        return (actually_read);
 }
  
-int idetape_chrdev_write (struct inode *inode, struct file *file, const char *buf, int count)
-
+static long idetape_chrdev_write (struct inode *inode, struct file *file, const char *buf, unsigned long count)
 {
-       ide_drive_t *drive=idetape_chrdev.drive;
-       idetape_tape_t *tape=&(drive->tape);
-       const char *buf_ptr=buf;
+       ide_drive_t *drive = get_drive_ptr (inode->i_rdev);
+       idetape_tape_t *tape = drive->driver_data;
        int retval,actually_written=0;
 
 #if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_chrdev_write\n");
+       printk (KERN_INFO "Reached idetape_chrdev_write\n");
 #endif /* IDETAPE_DEBUG_LOG */
 
        if (tape->chrdev_direction != idetape_direction_write) {        /* Initialize write operation */
                if (tape->chrdev_direction == idetape_direction_read)
                        idetape_discard_read_pipeline (drive);
+#if IDETAPE_DEBUG_BUGS
+               if (tape->merge_stage || tape->merge_stage_size) {
+                       printk (KERN_ERR "ide-tape: merge_stage_size should be 0 now\n");
+                       tape->merge_stage_size = 0;
+               }
+#endif /* IDETAPE_DEBUG_BUGS */
+               if ((tape->merge_stage = __idetape_kmalloc_stage (tape)) == NULL)
+                       return -ENOMEM;
+               tape->chrdev_direction = idetape_direction_write;
+               idetape_init_merge_stage (tape);
 
                /*
                 *      Issue a write 0 command to ensure that DSC handshake
                 *      is switched from completion mode to buffer available
                 *      mode.
                 */
-
-               retval=idetape_queue_rw_tail (drive,IDETAPE_WRITE_REQUEST,0,tape->merge_buffer);
-               if (retval < 0)
-                       return (retval);                
-
-               tape->chrdev_direction=idetape_direction_write;
-       }
-
-       if (count==0)
-               return (0);
-
-       if (tape->merge_buffer_size) {
-#if IDETAPE_DEBUG_BUGS
-               if (tape->merge_buffer_size >= tape->data_buffer_size) {
-                       printk ("ide-tape: bug: merge buffer too big\n");
-                       tape->merge_buffer_size=0;
-               }
-#endif /* IDETAPE_DEBUG_BUGS */
-
-               actually_written=IDETAPE_MIN (tape->data_buffer_size-tape->merge_buffer_size,count);
-               copy_from_user (tape->merge_buffer+tape->merge_buffer_size,buf_ptr,actually_written);
-               buf_ptr+=actually_written;tape->merge_buffer_size+=actually_written;count-=actually_written;
-
-               if (tape->merge_buffer_size == tape->data_buffer_size) {
-                       tape->merge_buffer_size=0;
-                       retval=idetape_add_chrdev_write_request (drive,tape->capabilities.ctl,tape->merge_buffer);
-                       if (retval <= 0)
-                               return (retval);
+               retval = idetape_queue_rw_tail (drive, IDETAPE_WRITE_RQ, 0, tape->merge_stage->bh);
+               if (retval < 0) {
+                       kfree (tape->merge_stage);
+                       tape->merge_stage = NULL;
+                       tape->chrdev_direction = idetape_direction_none;
+                       return retval;
                }
+               if (test_bit (IDETAPE_DETECT_BS, &tape->flags))
+                       if (count > tape->tape_block_size && (count % tape->tape_block_size) == 0)
+                               tape->user_bs_factor = count / tape->tape_block_size;
        }
+       if (count==0)
+               return (0);
+       if (tape->merge_stage_size) {
+#if IDETAPE_DEBUG_BUGS
+               if (tape->merge_stage_size >= tape->stage_size) {
+                       printk (KERN_ERR "ide-tape: bug: merge buffer too big\n");
+                       tape->merge_stage_size=0;
+               }
+#endif /* IDETAPE_DEBUG_BUGS */
+               actually_written=IDETAPE_MIN (tape->stage_size-tape->merge_stage_size,count);
+               idetape_copy_stage_from_user (tape, tape->merge_stage, buf, actually_written);
+               buf+=actually_written;tape->merge_stage_size+=actually_written;count-=actually_written;
 
-       while (count >= tape->data_buffer_size) {
-               copy_from_user (tape->merge_buffer,buf_ptr,tape->data_buffer_size);
-               buf_ptr+=tape->data_buffer_size;count-=tape->data_buffer_size;
-               retval=idetape_add_chrdev_write_request (drive,tape->capabilities.ctl,tape->merge_buffer);
-               actually_written+=tape->data_buffer_size;
-               if (retval <= 0)
-                       return (retval);
-       }
-
-       if (count) {
-               actually_written+=count;
-               copy_from_user (tape->merge_buffer,buf_ptr,count);
-               tape->merge_buffer_size+=count;
-       }
-       return (actually_written);
-}
-
-/*
- *     Our character device ioctls.
- *
- *     General mtio.h magnetic io commands are supported here, and not in
- *     the corresponding block interface.
- *
- *     The following ioctls are supported:
- *
- *     MTIOCTOP -      Refer to idetape_mtioctop for detailed description.
- *
- *     MTIOCGET -      The mt_dsreg field in the returned mtget structure
- *                     will be set to (recommended block size <<
- *                     MT_ST_BLKSIZE_SHIFT) & MT_ST_BLKSIZE_MASK, which
- *                     is currently equal to the size itself.
- *                     The other mtget fields are not supported.
- *
- *                     Note that we do not actually return the tape's
- *                     block size. Rather, we provide the recommended
- *                     number of bytes which should be used as a "user
- *                     block size" with the character device read/write
- *                     functions to maximize throughput.
- *
- *     MTIOCPOS -      The current tape "position" is returned.
- *                     (A unique number which can be used with the MTSEEK
- *                      operation to return to this position in some
- *                      future time, provided this place was not overwritten
- *                      meanwhile).
- *
- *     Our own ide-tape ioctls are supported on both interfaces.
- */
-
-int idetape_chrdev_ioctl (struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
-
-{
-       ide_drive_t *drive=idetape_chrdev.drive;
-       idetape_tape_t *tape=&(drive->tape);
-       idetape_packet_command_t pc;
-       struct mtop mtop;
-       struct mtget mtget;
-       struct mtpos mtpos;
-       int retval;
-
-#if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_chrdev_ioctl, cmd=%u\n",cmd);
-#endif /* IDETAPE_DEBUG_LOG */
-
-       if (tape->chrdev_direction == idetape_direction_write) {
-               idetape_empty_write_pipeline (drive);
-               idetape_flush_tape_buffers (drive);
-       }
-
-       if (tape->chrdev_direction == idetape_direction_read && cmd != MTIOCTOP)
-               idetape_discard_read_pipeline (drive);
-       
-       pc.buffer=pc.temp_buffer;
-       pc.buffer_size=IDETAPE_TEMP_BUFFER_SIZE;
-       pc.current_position=pc.temp_buffer;
-
-       switch (cmd) {
-               case MTIOCTOP:
-                       retval=verify_area (VERIFY_READ,(char *) arg,sizeof (struct mtop));
-                       if (retval) return (retval);
-                       copy_from_user ((char *) &mtop, (char *) arg, sizeof (struct mtop));
-                       return (idetape_mtioctop (drive,mtop.mt_op,mtop.mt_count));
-               case MTIOCGET:
-                       mtget.mt_dsreg=(tape->data_buffer_size << MT_ST_BLKSIZE_SHIFT) & MT_ST_BLKSIZE_MASK;
-                       retval=verify_area (VERIFY_WRITE,(char *) arg,sizeof (struct mtget));
-                       if (retval) return (retval);
-                       copy_to_user ((char *) arg,(char *) &mtget, sizeof (struct mtget));
-                       return (0);
-               case MTIOCPOS:
-                       idetape_create_read_position_cmd (&pc);
-                       retval=idetape_queue_pc_tail (drive,&pc);
-                       if (retval) return (retval);
-                       mtpos.mt_blkno=tape->block_address;
-                       retval=verify_area (VERIFY_WRITE,(char *) arg,sizeof (struct mtpos));
-                       if (retval) return (retval);
-                       copy_to_user ((char *) arg,(char *) &mtpos, sizeof (struct mtpos));
-                       return (0);
-               default:
-                       return (idetape_blkdev_ioctl (drive,inode,file,cmd,arg));
+               if (tape->merge_stage_size == tape->stage_size) {
+                       tape->merge_stage_size = 0;
+                       retval=idetape_add_chrdev_write_request (drive, tape->capabilities.ctl);
+                       if (retval <= 0)
+                               return (retval);
+               }
+       }
+       while (count >= tape->stage_size) {
+               idetape_copy_stage_from_user (tape, tape->merge_stage, buf, tape->stage_size);
+               buf+=tape->stage_size;count-=tape->stage_size;
+               retval=idetape_add_chrdev_write_request (drive, tape->capabilities.ctl);
+               actually_written+=tape->stage_size;
+               if (retval <= 0)
+                       return (retval);
        }
+       if (count) {
+               actually_written+=count;
+               idetape_copy_stage_from_user (tape, tape->merge_stage, buf, count);
+               tape->merge_stage_size+=count;
+       }
+       return (actually_written);
 }
 
 /*
@@ -3918,7 +3107,6 @@ int idetape_chrdev_ioctl (struct inode *inode, struct file *file, unsigned int c
  *
  *     MTBSFM  -       Like MTBSF, only tape is positioned after the last filemark.
  *
- *
  *     Note:
  *
  *             MTBSF and MTBSFM are not supported when the tape doesn't
@@ -3932,8 +3120,10 @@ int idetape_chrdev_ioctl (struct inode *inode, struct file *file, unsigned int c
  *
  *     MTREW   -       Rewinds tape.
  *
+ *     MTLOAD  -       Loads the tape.
+ *
  *     MTOFFL  -       Puts the tape drive "Offline": Rewinds the tape and
- *                     prevents further access until the media is replaced.
+ *     MTUNLOAD        prevents further access until the media is replaced.
  *
  *     MTNOP   -       Flushes tape buffers.
  *
@@ -3944,35 +3134,33 @@ int idetape_chrdev_ioctl (struct inode *inode, struct file *file, unsigned int c
  *
  *     MTERASE -       Erases tape.
  *
- *     MTSEEK  -       Positions the tape in a specific block number, which
- *                     was previously received using the MTIOCPOS ioctl,
- *                     assuming this place was not overwritten meanwhile.
+ *     MTSETBLK -      Sets the user block size to mt_count bytes. If
+ *                     mt_count is 0, we will attempt to autodetect
+ *                     the block size.
+ *
+ *     MTSEEK  -       Positions the tape in a specific block number, where
+ *                     each block is assumed to contain which user_block_size
+ *                     bytes.
+ *
+ *     MTSETPART -     Switches to another tape partition.
  *
  *     The following commands are currently not supported:
  *
- *     MTFSR, MTBSR, MTFSS, MTBSS, MTWSM, MTSETBLK, MTSETDENSITY,
+ *     MTFSR, MTBSR, MTFSS, MTBSS, MTWSM, MTSETDENSITY,
  *     MTSETDRVBUFFER, MT_ST_BOOLEANS, MT_ST_WRITE_THRESHOLD.
  */
-int idetape_mtioctop (ide_drive_t *drive,short mt_op,int mt_count)
-
+static int idetape_mtioctop (ide_drive_t *drive,short mt_op,int mt_count)
 {
-       idetape_tape_t *tape=&(drive->tape);
-       idetape_packet_command_t pc;
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_pc_t pc;
        int i,retval;
 
-       pc.buffer=pc.temp_buffer;
-       pc.buffer_size=IDETAPE_TEMP_BUFFER_SIZE;
-       pc.current_position=pc.temp_buffer;
-
 #if IDETAPE_DEBUG_LOG
-       printk ("Handling MTIOCTOP ioctl: mt_op=%d, mt_count=%d\n",mt_op,mt_count);
+       printk (KERN_INFO "Handling MTIOCTOP ioctl: mt_op=%d, mt_count=%d\n",mt_op,mt_count);
 #endif /* IDETAPE_DEBUG_LOG */
-
        /*
         *      Commands which need our pipelined read-ahead stages.
         */
-
        switch (mt_op) {
                case MTFSF:
                case MTFSFM:
@@ -3988,7 +3176,6 @@ int idetape_mtioctop (ide_drive_t *drive,short mt_op,int mt_count)
        /*
         *      Empty the pipeline.
         */
-
        if (tape->chrdev_direction == idetape_direction_read)
                idetape_discard_read_pipeline (drive);
 
@@ -4002,6 +3189,10 @@ int idetape_mtioctop (ide_drive_t *drive,short mt_op,int mt_count)
                        return (0);
                case MTREW:
                        return (idetape_rewind_tape (drive));
+               case MTLOAD:
+                       idetape_create_load_unload_cmd (&pc, IDETAPE_LU_LOAD_MASK);
+                       return (idetape_queue_pc_tail (drive,&pc));
+               case MTUNLOAD:
                case MTOFFL:
                        idetape_create_load_unload_cmd (&pc,!IDETAPE_LU_LOAD_MASK);
                        return (idetape_queue_pc_tail (drive,&pc));
@@ -4014,607 +3205,582 @@ int idetape_mtioctop (ide_drive_t *drive,short mt_op,int mt_count)
                        idetape_create_space_cmd (&pc,0,IDETAPE_SPACE_TO_EOD);
                        return (idetape_queue_pc_tail (drive,&pc));
                case MTERASE:
-                       retval=idetape_rewind_tape (drive);
-                       if (retval) return (retval);
+                       (void) idetape_rewind_tape (drive);
                        idetape_create_erase_cmd (&pc);
                        return (idetape_queue_pc_tail (drive,&pc));
+               case MTSETBLK:
+                       if (mt_count) {
+                               if (mt_count < tape->tape_block_size || mt_count % tape->tape_block_size)
+                                       return -EIO;
+                               tape->user_bs_factor = mt_count / tape->tape_block_size;
+                               clear_bit (IDETAPE_DETECT_BS, &tape->flags);
+                       } else
+                               set_bit (IDETAPE_DETECT_BS, &tape->flags);
+                       return 0;
                case MTSEEK:
-                       return (idetape_position_tape (drive,mt_count));
+                       return (idetape_position_tape (drive, mt_count * tape->user_bs_factor, tape->partition));
+               case MTSETPART:
+                       return (idetape_position_tape (drive, 0, mt_count));
                default:
-                       printk ("ide-tape: MTIO operation %d not supported\n",mt_op);
+                       printk (KERN_ERR "ide-tape: MTIO operation %d not supported\n",mt_op);
                        return (-EIO);
        }
 }
 
 /*
- *     idetape_space_over_filemarks is now a bit more complicated than just
- *     passing the command to the tape since we may have crossed some
- *     filemarks during our pipelined read-ahead mode.
+ *     Our character device ioctls.
  *
- *     As a minor side effect, the pipeline enables us to support MTFSFM when
- *     the filemark is in our internal pipeline even if the tape doesn't
- *     support spacing over filemarks in the reverse direction.
- */
-int idetape_space_over_filemarks (ide_drive_t *drive,short mt_op,int mt_count)
-
-{
-       idetape_tape_t *tape=&(drive->tape);
-       idetape_packet_command_t pc;
-       unsigned long flags;
-       int retval,count=0,errors;
-
-       if (tape->chrdev_direction == idetape_direction_read) {
-
-               /*
-                *      We have a read-ahead buffer. Scan it for crossed
-                *      filemarks.
-                */
-
-               tape->merge_buffer_size=tape->merge_buffer_offset=0;
-               while (tape->first_stage != NULL) {
-                       
-                       /*
-                        *      Wait until the first read-ahead request
-                        *      is serviced.
-                        */
-               
-                       save_flags (flags);cli ();
-                       if (tape->active_data_request == &(tape->first_stage->rq))
-                               idetape_wait_for_request (tape->active_data_request);
-                       restore_flags (flags);
-
-                       errors=tape->first_stage->rq.errors;
-                       if (errors == IDETAPE_RQ_ERROR_FILEMARK)
-                               count++;
-
-                       if (count == mt_count) {
-                               switch (mt_op) {
-                                       case MTFSF:
-                                               idetape_remove_stage_head (drive);
-                                       case MTFSFM:
-                                               return (0);
-                               }
-                       }
-                       idetape_remove_stage_head (drive);
-               }
-               idetape_discard_read_pipeline (drive);
-       }
-
-       /*
-        *      The filemark was not found in our internal pipeline.
-        *      Now we can issue the space command.
-        */
-
-       pc.buffer=pc.temp_buffer;
-       pc.buffer_size=IDETAPE_TEMP_BUFFER_SIZE;
-       pc.current_position=pc.temp_buffer;
-
-       switch (mt_op) {
-               case MTFSF:
-                       idetape_create_space_cmd (&pc,mt_count-count,IDETAPE_SPACE_OVER_FILEMARK);
-                       return (idetape_queue_pc_tail (drive,&pc));
-               case MTFSFM:
-                       if (!tape->capabilities.sprev)
-                               return (-EIO);
-                       retval=idetape_mtioctop (drive,MTFSF,mt_count-count);
-                       if (retval) return (retval);
-                       return (idetape_mtioctop (drive,MTBSF,1));
-               case MTBSF:
-                       if (!tape->capabilities.sprev)
-                               return (-EIO);
-                       idetape_create_space_cmd (&pc,-(mt_count+count),IDETAPE_SPACE_OVER_FILEMARK);
-                       return (idetape_queue_pc_tail (drive,&pc));
-               case MTBSFM:
-                       if (!tape->capabilities.sprev)
-                               return (-EIO);
-                       retval=idetape_mtioctop (drive,MTBSF,mt_count+count);
-                       if (retval) return (retval);
-                       return (idetape_mtioctop (drive,MTFSF,1));
-               default:
-                       printk ("ide-tape: MTIO operation %d not supported\n",mt_op);
-                       return (-EIO);
-       }
-}
-
-/*
- *     Our character device open function.
- */
-
-int idetape_chrdev_open (struct inode *inode, struct file *filp)
-
-{
-       ide_drive_t *drive=idetape_chrdev.drive;
-       idetape_tape_t *tape=&(drive->tape);
-       unsigned long flags;
-       unsigned int minor=MINOR (inode->i_rdev),allocation_length;
-                       
-       save_flags (flags);cli ();
-
-#if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_chrdev_open\n");
-#endif /* IDETAPE_DEBUG_LOG */
-
-       if (minor!=0 && minor!=128) {           /* Currently supporting only one */
-               restore_flags (flags);          /* tape drive */
-               return (-ENXIO);
-       }
-
-       if (tape->busy) {
-               restore_flags (flags);          /* Allowing access only through one */
-               return (-EBUSY);                /* one file descriptor */
-       }
-
-       tape->busy=1;
-       restore_flags (flags);
-
-       allocation_length=tape->data_buffer_size;
-       if (tape->data_buffer_size % IDETAPE_ALLOCATION_BLOCK)
-               allocation_length+=IDETAPE_ALLOCATION_BLOCK;
-
-#if IDETAPE_MINIMIZE_IDLE_MEMORY_USAGE
-       if (tape->data_buffer == NULL)
-               tape->data_buffer=kmalloc (allocation_length,GFP_KERNEL);
-       if (tape->data_buffer == NULL)
-               goto sorry;
-       if (tape->merge_buffer == NULL)
-               tape->merge_buffer=kmalloc (allocation_length,GFP_KERNEL);
-       if (tape->merge_buffer == NULL) {
-               kfree (tape->data_buffer);
-       sorry:
-               printk ("ide-tape: FATAL - Can not allocate continuous buffer of %d bytes\n",allocation_length);
-               tape->busy=0;
-               return (-EIO);
-       }
-#endif /* IDETAPE_MINIMIZE_IDLE_MEMORY_USAGE */
-
-       if (!tape->block_address_valid) {
-               if (idetape_rewind_tape (drive)) {
-                       printk ("ide-tape: Rewinding tape failed\n");
-                       tape->busy=0;
-                       return (-EIO);
-               }
-       }
-
-       return (0);
-}
-
-/*
- *     Our character device release function.
- */
-
-void idetape_chrdev_release (struct inode *inode, struct file *filp)
-
-{
-       ide_drive_t *drive=idetape_chrdev.drive;
-       idetape_tape_t *tape=&(drive->tape);
-       unsigned int minor=MINOR (inode->i_rdev);
-       idetape_packet_command_t pc;
-       unsigned long flags;
-                       
-#if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_chrdev_release\n");
-#endif /* IDETAPE_DEBUG_LOG */
-
-       if (tape->chrdev_direction == idetape_direction_write) {
-               idetape_empty_write_pipeline (drive);
-               idetape_create_write_filemark_cmd (&pc,1);      /* Write a filemark */
-               if (idetape_queue_pc_tail (drive,&pc))
-                       printk ("ide-tape: Couldn't write a filemark\n");
-       }
-       
-       if (tape->chrdev_direction == idetape_direction_read) {
-               if (minor < 128)
-                       idetape_discard_read_pipeline (drive);
-               else
-                       idetape_wait_for_pipeline (drive);
-       }
-       
-       if (minor < 128)
-               if (idetape_rewind_tape (drive))
-                       printk ("ide-tape: Rewinding tape failed\n");
-
-#if IDETAPE_MINIMIZE_IDLE_MEMORY_USAGE
-       kfree (tape->data_buffer);
-       tape->data_buffer=NULL;
-       if (!tape->merge_buffer_size) {
-               kfree (tape->merge_buffer);
-               tape->merge_buffer=NULL;
-       }
-#endif /* IDETAPE_MINIMIZE_IDLE_MEMORY_USAGE */
-
-       save_flags (flags);cli ();
-       tape->busy=0;
-       restore_flags (flags);
-
-       return;
-}
-
-/*
- *     idetape_position_tape positions the tape to the requested block
- *     using the LOCATE packet command. A READ POSITION command is then
- *     issued to check where we are positioned.
+ *     General mtio.h magnetic io commands are supported here, and not in
+ *     the corresponding block interface.
  *
- *     Like all higher level operations, we queue the commands at the tail
- *     of the request queue and wait for their completion.
- *     
- */
-int idetape_position_tape (ide_drive_t *drive,unsigned long block)
-
-{
-       int retval;
-       idetape_packet_command_t pc;
-
-       idetape_create_locate_cmd (&pc,block,0);
-       retval=idetape_queue_pc_tail (drive,&pc);
-       if (retval!=0) return (retval);
-                       
-       idetape_create_read_position_cmd (&pc);
-       pc.buffer=pc.temp_buffer;
-       pc.buffer_size=IDETAPE_TEMP_BUFFER_SIZE;
-       pc.current_position=pc.temp_buffer;
-       return (idetape_queue_pc_tail (drive,&pc));
-}
-
-/*
- *     Rewinds the tape to the Beginning Of the current Partition (BOP).
+ *     The following ioctls are supported:
  *
- *     We currently support only one partition.
- */ 
-
-int idetape_rewind_tape (ide_drive_t *drive)
-
-{
-       int retval;
-       idetape_packet_command_t pc;
-#if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_rewind_tape\n");
-#endif /* IDETAPE_DEBUG_LOG */ 
-       
-       idetape_create_rewind_cmd (&pc);
-       retval=idetape_queue_pc_tail (drive,&pc);
-       if (retval) return (retval);
-                       
-       idetape_create_read_position_cmd (&pc);
-       pc.buffer=pc.temp_buffer;
-       pc.buffer_size=IDETAPE_TEMP_BUFFER_SIZE;
-       pc.current_position=pc.temp_buffer;
-       return (idetape_queue_pc_tail (drive,&pc));
-}
-
-int idetape_flush_tape_buffers (ide_drive_t *drive)
-
-{
-       idetape_packet_command_t pc;
-
-       idetape_create_write_filemark_cmd (&pc,0);
-       return (idetape_queue_pc_tail (drive,&pc));
-}
-
-/*
- *     Pipeline related functions
- */
-
-/*
- *     idetape_kmalloc_stage uses kmalloc to allocate a pipeline stage,
- *     along with all the necessary small buffers which together make
- *     a buffer of size tape->data_buffer_size or a bit more, in case
- *     it is not a multiply of IDETAPE_ALLOCATION_BLOCK (it isn't ...).
+ *     MTIOCTOP -      Refer to idetape_mtioctop for detailed description.
  *
- *     Returns a pointer to the new allocated stage, or NULL if we
- *     can't (or don't want to, in case we already have too many stages)
- *     allocate a stage.
+ *     MTIOCGET -      The mt_dsreg field in the returned mtget structure
+ *                     will be set to (user block size in bytes <<
+ *                     MT_ST_BLKSIZE_SHIFT) & MT_ST_BLKSIZE_MASK.
  *
- *     Pipeline stages are optional and are used to increase performance.
- *     If we can't allocate them, we'll manage without them.
- */
-idetape_pipeline_stage_t *idetape_kmalloc_stage (ide_drive_t *drive)
-
-{
-       idetape_tape_t *tape=&(drive->tape);
-       idetape_pipeline_stage_t *new_stage;
-       idetape_buffer_head_t *prev_bh,*bh;
-       int buffers_num,i;
-       
-#if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_kmalloc_stage\n");
-#endif /* IDETAPE_DEBUG_LOG */
-
-       if (tape->current_number_of_stages>=tape->max_number_of_stages) {
-               return (NULL);
-       }
-               
-       new_stage=(idetape_pipeline_stage_t *) kmalloc (sizeof (idetape_pipeline_stage_t),GFP_KERNEL);
-       if (new_stage==NULL)
-               return (NULL);
-               
-       new_stage->next=new_stage->prev=NULL;
-
-       buffers_num=tape->data_buffer_size / IDETAPE_ALLOCATION_BLOCK;
-       if (tape->data_buffer_size % IDETAPE_ALLOCATION_BLOCK)
-               buffers_num++;
-
-       prev_bh=new_stage->bh=(idetape_buffer_head_t *) kmalloc (sizeof (idetape_buffer_head_t),GFP_KERNEL);
-       if (new_stage->bh==NULL) {
-               idetape_kfree_stage (new_stage);
-               return (NULL);
-       }
-       new_stage->bh->next=NULL;
-
-       new_stage->bh->data=kmalloc (IDETAPE_ALLOCATION_BLOCK,GFP_KERNEL);
-       if (new_stage->bh->data==NULL) {
-               idetape_kfree_stage (new_stage);
-               return (NULL);
-       }
-       
-       for (i=1;i<buffers_num;i++) {
-               bh=(idetape_buffer_head_t *) kmalloc (sizeof (idetape_buffer_head_t),GFP_KERNEL);
-               if (bh==NULL) {
-                       idetape_kfree_stage (new_stage);
-                       return (NULL);
-               }
-               bh->next=NULL;
-               prev_bh->next=bh;
-               bh->data=kmalloc (IDETAPE_ALLOCATION_BLOCK,GFP_KERNEL);
-               if (bh->data == NULL) {
-                       idetape_kfree_stage (new_stage);
-                       return (NULL);
-               }
-               prev_bh=bh;
-       }
-       return (new_stage);
-}
-
-/*
- *     idetape_kfree_stage calls kfree to completely free a stage, along with
- *     its related buffers.
+ *                     The mt_blkno is set to the current user block number.
+ *                     The other mtget fields are not supported.
+ *
+ *     MTIOCPOS -      The current tape "block position" is returned. We
+ *                     assume that each block contains user_block_size
+ *                     bytes.
+ *
+ *     Our own ide-tape ioctls are supported on both interfaces.
  */
-void idetape_kfree_stage (idetape_pipeline_stage_t *stage)
-
+static int idetape_chrdev_ioctl (struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
 {
-       idetape_buffer_head_t *prev_bh,*bh;
-       
-       if (stage == NULL)
-               return;
+       ide_drive_t *drive = get_drive_ptr (inode->i_rdev);
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_pc_t pc;
+       struct mtop mtop;
+       struct mtget mtget;
+       struct mtpos mtpos;
+       int retval, block_offset = 0;
 
 #if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_kfree_stage\n");
+       printk (KERN_INFO "Reached idetape_chrdev_ioctl, cmd=%u\n",cmd);
 #endif /* IDETAPE_DEBUG_LOG */
-       
-       bh=stage->bh;
-       
-       while (bh != NULL) {
-               prev_bh=bh;
-               if (bh->data != NULL)
-                       kfree (bh->data);
-               bh=bh->next;
-               kfree (prev_bh);
+
+       if (tape->chrdev_direction == idetape_direction_write) {
+               idetape_empty_write_pipeline (drive);
+               idetape_flush_tape_buffers (drive);
+       }
+       if (cmd == MTIOCGET || cmd == MTIOCPOS) {
+               block_offset = idetape_pipeline_size (drive) / (tape->tape_block_size * tape->user_bs_factor);
+               idetape_create_read_position_cmd (&pc);
+               retval=idetape_queue_pc_tail (drive,&pc);
+               if (retval) return (retval);
+       }
+       switch (cmd) {
+               case MTIOCTOP:
+                       if (copy_from_user ((char *) &mtop, (char *) arg, sizeof (struct mtop)))
+                               return -EFAULT;
+                       return (idetape_mtioctop (drive,mtop.mt_op,mtop.mt_count));
+               case MTIOCGET:
+                       memset (&mtget, 0, sizeof (struct mtget));
+                       mtget.mt_blkno = tape->block_address / tape->user_bs_factor - block_offset;
+                       mtget.mt_dsreg = ((tape->tape_block_size * tape->user_bs_factor) << MT_ST_BLKSIZE_SHIFT) & MT_ST_BLKSIZE_MASK;
+                       if (copy_to_user ((char *) arg,(char *) &mtget, sizeof (struct mtget)))
+                               return -EFAULT;
+                       return 0;
+               case MTIOCPOS:
+                       mtpos.mt_blkno = tape->block_address / tape->user_bs_factor - block_offset;
+                       if (copy_to_user ((char *) arg,(char *) &mtpos, sizeof (struct mtpos)))
+                               return -EFAULT;
+                       return 0;
+               default:
+                       if (tape->chrdev_direction == idetape_direction_read)
+                               idetape_discard_read_pipeline (drive);
+                       return (idetape_blkdev_ioctl (drive,inode,file,cmd,arg));
        }
-       
-       kfree (stage);
-       return;
 }
 
 /*
- *     idetape_copy_buffer_from_stage and idetape_copy_buffer_to_stage
- *     copy data from/to the small buffers into/from a continuous buffer.
+ *     Our character device open function.
  */
-  
-void idetape_copy_buffer_from_stage (idetape_pipeline_stage_t *stage,char *buffer)
-
+static int idetape_chrdev_open (struct inode *inode, struct file *filp)
 {
-       idetape_buffer_head_t *bh;
-       char *ptr;
-
+       ide_drive_t *drive;
+       idetape_tape_t *tape;
+       idetape_pc_t pc;
+                       
 #if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_copy_buffer_from_stage\n");
+       printk (KERN_INFO "Reached idetape_chrdev_open\n");
 #endif /* IDETAPE_DEBUG_LOG */
-#if IDETAPE_DEBUG_BUGS
-       if (buffer == NULL) {
-               printk ("ide-tape: bug: buffer is null in copy_buffer_from_stage\n");
-               return;
-       }
-#endif /* IDETAPE_DEBUG_BUGS */
        
-       ptr=buffer;
-       bh=stage->bh;
-       
-       while (bh != NULL) {
-#if IDETAPE_DEBUG_BUGS
-               if (bh->data == NULL) {
-                       printk ("ide-tape: bug: bh->data is null\n");
-                       return;
-               }
-#endif /* IDETAPE_DEBUG_BUGS */
-               memcpy (ptr,bh->data,IDETAPE_ALLOCATION_BLOCK);
-               bh=bh->next;
-               ptr=ptr+IDETAPE_ALLOCATION_BLOCK;
-       }
-       return;
+       if ((drive = get_drive_ptr (inode->i_rdev)) == NULL)
+               return -ENXIO;
+       tape = drive->driver_data;
+
+       if (set_bit (IDETAPE_BUSY, &tape->flags))
+               return -EBUSY;
+       MOD_INC_USE_COUNT;
+       idetape_create_read_position_cmd (&pc);
+       (void) idetape_queue_pc_tail (drive,&pc);
+       if (!test_bit (IDETAPE_ADDRESS_VALID, &tape->flags))
+               (void) idetape_rewind_tape (drive);
+       MOD_DEC_USE_COUNT;
+
+       if (tape->chrdev_direction == idetape_direction_none)
+               MOD_INC_USE_COUNT;
+       return 0;
 }
 
 /*
- *     Here we copy a continuous data buffer to the various small buffers
- *     in the pipeline stage.
+ *     Our character device release function.
  */
-void idetape_copy_buffer_to_stage (idetape_pipeline_stage_t *stage,char *buffer)
-
+static void idetape_chrdev_release (struct inode *inode, struct file *filp)
 {
-       idetape_buffer_head_t *bh;
-       char *ptr;
-
+       ide_drive_t *drive = get_drive_ptr (inode->i_rdev);
+       idetape_tape_t *tape = drive->driver_data;
+       unsigned int minor=MINOR (inode->i_rdev);
+       idetape_pc_t pc;
+                       
 #if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_copy_buffer_to_stage\n");
+       printk (KERN_INFO "Reached idetape_chrdev_release\n");
 #endif /* IDETAPE_DEBUG_LOG */
-#if IDETAPE_DEBUG_BUGS
-       if (buffer == NULL) {
-               printk ("ide-tape: bug: buffer is null in copy_buffer_to_stage\n");
-               return;
-       }
-#endif /* IDETAPE_DEBUG_BUGS */
 
-       ptr=buffer;
-       bh=stage->bh;
-       
-       while (bh != NULL) {
-#if IDETAPE_DEBUG_BUGS
-               if (bh->data == NULL) {
-                       printk ("ide-tape: bug: bh->data is null\n");
-                       return;
+       if (tape->chrdev_direction == idetape_direction_write) {
+               idetape_empty_write_pipeline (drive);
+               tape->merge_stage = __idetape_kmalloc_stage (tape);
+               if (tape->merge_stage != NULL) {
+                       idetape_pad_zeros (drive, tape->tape_block_size * (tape->user_bs_factor - 1));
+                       __idetape_kfree_stage (tape->merge_stage);
+                       tape->merge_stage = NULL;
                }
-#endif /* IDETAPE_DEBUG_BUGS */
-               memcpy (bh->data,ptr,IDETAPE_ALLOCATION_BLOCK);
-               bh=bh->next;
-               ptr=ptr+IDETAPE_ALLOCATION_BLOCK;
+               idetape_create_write_filemark_cmd (&pc,1);      /* Write a filemark */
+               if (idetape_queue_pc_tail (drive,&pc))
+                       printk (KERN_ERR "ide-tape: Couldn't write a filemark\n");
+       }
+       if (tape->chrdev_direction == idetape_direction_read) {
+               if (minor < 128)
+                       idetape_discard_read_pipeline (drive);
+               else
+                       idetape_wait_for_pipeline (drive);
+       }
+       if (tape->cache_stage != NULL) {
+               __idetape_kfree_stage (tape->cache_stage);
+               tape->cache_stage = NULL;
        }
-       return;
+       if (minor < 128)
+               (void) idetape_rewind_tape (drive);
+
+       clear_bit (IDETAPE_BUSY, &tape->flags);
+       if (tape->chrdev_direction == idetape_direction_none)
+               MOD_DEC_USE_COUNT;
 }
 
 /*
- *     idetape_increase_max_pipeline_stages is a part of the feedback
- *     loop which tries to find the optimum number of stages. In the
- *     feedback loop, we are starting from a minimum maximum number of
- *     stages, and if we sense that the pipeline is empty, we try to
- *     increase it, until we reach the user compile time memory limit.
+ *     idetape_identify_device is called to check the contents of the
+ *     ATAPI IDENTIFY command results. We return:
+ *
+ *     1       If the tape can be supported by us, based on the information
+ *             we have so far.
+ *
+ *     0       If this tape driver is not currently supported by us.
  */
-
-void idetape_increase_max_pipeline_stages (ide_drive_t *drive)
-
+static int idetape_identify_device (ide_drive_t *drive,struct hd_driveid *id)
 {
-       idetape_tape_t *tape=&(drive->tape);
-       
+       struct idetape_id_gcw gcw;
 #if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_increase_max_pipeline_stages\n");
+       unsigned short mask,i;
 #endif /* IDETAPE_DEBUG_LOG */
 
-       tape->max_number_of_stages+=IDETAPE_INCREASE_STAGES_RATE;
-
-       if (tape->max_number_of_stages >= IDETAPE_MAX_PIPELINE_STAGES)
-               tape->max_number_of_stages = IDETAPE_MAX_PIPELINE_STAGES;
+       *((unsigned short *) &gcw) = id->config;
 
 #if IDETAPE_DEBUG_LOG
-       printk ("Maximum number of stages: %d\n",tape->max_number_of_stages);
+       printk (KERN_INFO "Dumping ATAPI Identify Device tape parameters\n");
+       printk (KERN_INFO "Protocol Type: ");
+       switch (gcw.protocol) {
+               case 0: case 1: printk (KERN_INFO "ATA\n");break;
+               case 2: printk (KERN_INFO "ATAPI\n");break;
+               case 3: printk (KERN_INFO "Reserved (Unknown to ide-tape)\n");break;
+       }
+       printk (KERN_INFO "Device Type: %x - ",gcw.device_type);        
+       switch (gcw.device_type) {
+               case 0: printk (KERN_INFO "Direct-access Device\n");break;
+               case 1: printk (KERN_INFO "Streaming Tape Device\n");break;
+               case 2: case 3: case 4: printk (KERN_INFO "Reserved\n");break;
+               case 5: printk (KERN_INFO "CD-ROM Device\n");break;
+               case 6: printk (KERN_INFO "Reserved\n");
+               case 7: printk (KERN_INFO "Optical memory Device\n");break;
+               case 0x1f: printk (KERN_INFO "Unknown or no Device type\n");break;
+               default: printk (KERN_INFO "Reserved\n");
+       }
+       printk (KERN_INFO "Removable: %s",gcw.removable ? "Yes\n":"No\n");      
+       printk (KERN_INFO "Command Packet DRQ Type: ");
+       switch (gcw.drq_type) {
+               case 0: printk (KERN_INFO "Microprocessor DRQ\n");break;
+               case 1: printk (KERN_INFO "Interrupt DRQ\n");break;
+               case 2: printk (KERN_INFO "Accelerated DRQ\n");break;
+               case 3: printk (KERN_INFO "Reserved\n");break;
+       }
+       printk (KERN_INFO "Command Packet Size: ");
+       switch (gcw.packet_size) {
+               case 0: printk (KERN_INFO "12 bytes\n");break;
+               case 1: printk (KERN_INFO "16 bytes\n");break;
+               default: printk (KERN_INFO "Reserved\n");break;
+       }
+       printk (KERN_INFO "Model: %s\n",id->model);
+       printk (KERN_INFO "Firmware Revision: %s\n",id->fw_rev);
+       printk (KERN_INFO "Serial Number: %s\n",id->serial_no);
+       printk (KERN_INFO "Write buffer size: %d bytes\n",id->buf_size*512);
+       printk (KERN_INFO "DMA: %s",id->capability & 0x01 ? "Yes\n":"No\n");
+       printk (KERN_INFO "LBA: %s",id->capability & 0x02 ? "Yes\n":"No\n");
+       printk (KERN_INFO "IORDY can be disabled: %s",id->capability & 0x04 ? "Yes\n":"No\n");
+       printk (KERN_INFO "IORDY supported: %s",id->capability & 0x08 ? "Yes\n":"Unknown\n");
+       printk (KERN_INFO "ATAPI overlap supported: %s",id->capability & 0x20 ? "Yes\n":"No\n");
+       printk (KERN_INFO "PIO Cycle Timing Category: %d\n",id->tPIO);
+       printk (KERN_INFO "DMA Cycle Timing Category: %d\n",id->tDMA);
+       printk (KERN_INFO "Single Word DMA supported modes: ");
+       for (i=0,mask=1;i<8;i++,mask=mask << 1) {
+               if (id->dma_1word & mask)
+                       printk (KERN_INFO "%d ",i);
+               if (id->dma_1word & (mask << 8))
+                       printk (KERN_INFO "(active) ");
+       }
+       printk (KERN_INFO "\n");
+       printk (KERN_INFO "Multi Word DMA supported modes: ");
+       for (i=0,mask=1;i<8;i++,mask=mask << 1) {
+               if (id->dma_mword & mask)
+                       printk (KERN_INFO "%d ",i);
+               if (id->dma_mword & (mask << 8))
+                       printk (KERN_INFO "(active) ");
+       }
+       printk (KERN_INFO "\n");
+       if (id->field_valid & 0x0002) {
+               printk (KERN_INFO "Enhanced PIO Modes: %s\n",id->eide_pio_modes & 1 ? "Mode 3":"None");
+               printk (KERN_INFO "Minimum Multi-word DMA cycle per word: ");
+               if (id->eide_dma_min == 0)
+                       printk (KERN_INFO "Not supported\n");
+               else
+                       printk (KERN_INFO "%d ns\n",id->eide_dma_min);
+
+               printk (KERN_INFO "Manufacturer\'s Recommended Multi-word cycle: ");
+               if (id->eide_dma_time == 0)
+                       printk (KERN_INFO "Not supported\n");
+               else
+                       printk (KERN_INFO "%d ns\n",id->eide_dma_time);
+
+               printk (KERN_INFO "Minimum PIO cycle without IORDY: ");
+               if (id->eide_pio == 0)
+                       printk (KERN_INFO "Not supported\n");
+               else
+                       printk (KERN_INFO "%d ns\n",id->eide_pio);
+
+               printk (KERN_INFO "Minimum PIO cycle with IORDY: ");
+               if (id->eide_pio_iordy == 0)
+                       printk (KERN_INFO "Not supported\n");
+               else
+                       printk (KERN_INFO "%d ns\n",id->eide_pio_iordy);
+               
+       } else
+               printk (KERN_INFO "According to the device, fields 64-70 are not valid.\n");
 #endif /* IDETAPE_DEBUG_LOG */
 
-       return;
+       /* Check that we can support this device */
+
+       if (gcw.protocol !=2 )
+               printk (KERN_ERR "ide-tape: Protocol is not ATAPI\n");
+       else if (gcw.device_type != 1)
+               printk (KERN_ERR "ide-tape: Device type is not set to tape\n");
+       else if (!gcw.removable)
+               printk (KERN_ERR "ide-tape: The removable flag is not set\n");
+       else if (gcw.drq_type != 2) {
+               printk (KERN_ERR "ide-tape: Sorry, DRQ types other than Accelerated DRQ\n");
+               printk (KERN_ERR "ide-tape: are still not supported by the driver\n");
+       } else if (gcw.packet_size != 0) {
+               printk (KERN_ERR "ide-tape: Packet size is not 12 bytes long\n");
+               if (gcw.packet_size == 1)
+                       printk (KERN_ERR "ide-tape: Sorry, padding to 16 bytes is still not supported\n");
+       } else
+               return 1;
+       return 0;
 }
 
 /*
- *     idetape_add_stage_tail adds a new stage at the end of the pipeline.
- *
- *     Caller should disable interrupts, if necessary.
+ *     idetape_get_mode_sense_results asks the tape about its various
+ *     parameters. In particular, we will adjust our data transfer buffer
+ *     size to the recommended value as returned by the tape.
  */
-void idetape_add_stage_tail (ide_drive_t *drive,idetape_pipeline_stage_t *stage)
-
+static void idetape_get_mode_sense_results (ide_drive_t *drive)
 {
-       idetape_tape_t *tape=&(drive->tape);
+       idetape_tape_t *tape = drive->driver_data;
+       idetape_pc_t pc;
+       idetape_mode_parameter_header_t *header;
+       idetape_capabilities_page_t *capabilities;
        
-#if IDETAPE_DEBUG_LOG
-               printk ("Reached idetape_add_stage_tail\n");
+       idetape_create_mode_sense_cmd (&pc,IDETAPE_CAPABILITIES_PAGE);
+       if (idetape_queue_pc_tail (drive,&pc)) {
+               printk (KERN_ERR "ide-tape: Can't get tape parameters - assuming some default values\n");
+               tape->tape_block_size = 512; tape->capabilities.ctl = 52;
+               tape->capabilities.speed = 450; tape->capabilities.buffer_size = 6 * 52;
+               return;
+       }
+       header = (idetape_mode_parameter_header_t *) pc.buffer;
+       capabilities = (idetape_capabilities_page_t *) (header + 1);
+
+       capabilities->max_speed = ntohs (capabilities->max_speed);
+       capabilities->ctl = ntohs (capabilities->ctl);
+       capabilities->speed = ntohs (capabilities->speed);
+       capabilities->buffer_size = ntohs (capabilities->buffer_size);
+
+       tape->capabilities = *capabilities;             /* Save us a copy */
+       tape->tape_block_size = capabilities->blk512 ? 512:1024;
+#if IDETAPE_DEBUG_LOG
+       printk (KERN_INFO "Dumping the results of the MODE SENSE packet command\n");
+       printk (KERN_INFO "Mode Parameter Header:\n");
+       printk (KERN_INFO "Mode Data Length - %d\n",header->mode_data_length);
+       printk (KERN_INFO "Medium Type - %d\n",header->medium_type);
+       printk (KERN_INFO "Device Specific Parameter - %d\n",header->dsp);
+       printk (KERN_INFO "Block Descriptor Length - %d\n",header->bdl);
+       
+       printk (KERN_INFO "Capabilities and Mechanical Status Page:\n");
+       printk (KERN_INFO "Page code - %d\n",capabilities->page_code);
+       printk (KERN_INFO "Page length - %d\n",capabilities->page_length);
+       printk (KERN_INFO "Read only - %s\n",capabilities->ro ? "Yes":"No");
+       printk (KERN_INFO "Supports reverse space - %s\n",capabilities->sprev ? "Yes":"No");
+       printk (KERN_INFO "Supports erase initiated formatting - %s\n",capabilities->efmt ? "Yes":"No");
+       printk (KERN_INFO "Supports QFA two Partition format - %s\n",capabilities->qfa ? "Yes":"No");
+       printk (KERN_INFO "Supports locking the medium - %s\n",capabilities->lock ? "Yes":"No");
+       printk (KERN_INFO "The volume is currently locked - %s\n",capabilities->locked ? "Yes":"No");
+       printk (KERN_INFO "The device defaults in the prevent state - %s\n",capabilities->prevent ? "Yes":"No");
+       printk (KERN_INFO "Supports ejecting the medium - %s\n",capabilities->eject ? "Yes":"No");
+       printk (KERN_INFO "Supports error correction - %s\n",capabilities->ecc ? "Yes":"No");
+       printk (KERN_INFO "Supports data compression - %s\n",capabilities->cmprs ? "Yes":"No");
+       printk (KERN_INFO "Supports 512 bytes block size - %s\n",capabilities->blk512 ? "Yes":"No");
+       printk (KERN_INFO "Supports 1024 bytes block size - %s\n",capabilities->blk1024 ? "Yes":"No");
+       printk (KERN_INFO "Restricted byte count for PIO transfers - %s\n",capabilities->slowb ? "Yes":"No");
+       printk (KERN_INFO "Maximum supported speed in KBps - %d\n",capabilities->max_speed);
+       printk (KERN_INFO "Continuous transfer limits in blocks - %d\n",capabilities->ctl);
+       printk (KERN_INFO "Current speed in KBps - %d\n",capabilities->speed);  
+       printk (KERN_INFO "Buffer size - %d\n",capabilities->buffer_size*512);
 #endif /* IDETAPE_DEBUG_LOG */
-
-       stage->next=NULL;
-       stage->prev=tape->last_stage;
-       if (tape->last_stage != NULL)
-               tape->last_stage->next=stage;
-       else
-               tape->first_stage=tape->next_stage=stage;
-       tape->last_stage=stage;
-       if (tape->next_stage == NULL)
-               tape->next_stage=tape->last_stage;
-       tape->current_number_of_stages++;
 }
 
 /*
- *     idetape_remove_stage_head removes tape->first_stage from the pipeline.
+ *     ide_setup is called to:
+ *
+ *             1.      Initialize our various state variables.
+ *             2.      Ask the tape for its capabilities.
+ *             3.      Allocate a buffer which will be used for data
+ *                     transfer. The buffer size is chosen based on
+ *                     the recommendation which we received in step (2).
  *
- *     Again, caller should avoid race conditions.
+ *     Note that at this point ide.c already assigned us an irq, so that
+ *     we can queue requests here and wait for their completion.
  */
-void idetape_remove_stage_head (ide_drive_t *drive)
-
+static void idetape_setup (ide_drive_t *drive, idetape_tape_t *tape, int minor)
 {
-       idetape_tape_t *tape=&(drive->tape);
-       idetape_pipeline_stage_t *stage;
-       
-#if IDETAPE_DEBUG_LOG
-               printk ("Reached idetape_remove_stage_head\n");
-#endif /* IDETAPE_DEBUG_LOG */
-#if IDETAPE_DEBUG_BUGS
-       if (tape->first_stage == NULL) {
-               printk ("ide-tape: bug: tape->first_stage is NULL\n");
-               return;         
+       ide_hwif_t *hwif = HWIF(drive);
+       unsigned long t1, tmid, tn, t;
+
+       drive->driver_data = tape;
+       drive->ready_stat = 0;                  /* An ATAPI device ignores DRDY */
+       memset (tape, 0, sizeof (idetape_tape_t));
+       tape->drive = drive;
+       tape->minor = minor;
+       tape->name[0] = 'h'; tape->name[1] = 't'; tape->name[2] = '0' + minor;
+       tape->chrdev_direction = idetape_direction_none;
+       tape->pc = tape->pc_stack;
+       tape->max_stages = IDETAPE_MIN_PIPELINE_STAGES;
+
+       idetape_get_mode_sense_results (drive);
+
+       tape->user_bs_factor = 1;
+       tape->stage_size = tape->capabilities.ctl * tape->tape_block_size;
+       while (tape->stage_size > 0xffff) {
+               printk (KERN_NOTICE "ide-tape: decreasing stage size\n");
+               tape->capabilities.ctl /= 2;
+               tape->stage_size = tape->capabilities.ctl * tape->tape_block_size;
        }
-       if (tape->active_stage == tape->first_stage) {
-               printk ("ide-tape: bug: Trying to free our active pipeline stage\n");
-               return;
+       tape->pages_per_stage = tape->stage_size / PAGE_SIZE;
+       if (tape->stage_size % PAGE_SIZE) {
+               tape->pages_per_stage++;
+               tape->excess_bh_size = PAGE_SIZE - tape->stage_size % PAGE_SIZE;
        }
-#endif /* IDETAPE_DEBUG_BUGS */
-       stage=tape->first_stage;
-       tape->first_stage=stage->next;
-       idetape_kfree_stage (stage);
-       tape->current_number_of_stages--;
-       if (tape->first_stage == NULL) {
-               tape->last_stage=NULL;
-#if IDETAPE_DEBUG_BUGS
-               if (tape->next_stage != NULL)
-                       printk ("ide-tape: bug: tape->next_stage != NULL\n");
-               if (tape->current_number_of_stages)
-                       printk ("ide-tape: bug: current_number_of_stages should be 0 now\n");
-#endif /* IDETAPE_DEBUG_BUGS */
+
+       /*
+        *      Select the "best" DSC read/write polling frequency.
+        *      The following algorithm attempts to find a balance between
+        *      good latency and good system throughput. It will be nice to
+        *      have all this configurable in run time at some point.
+        */
+       t1 = (tape->stage_size * HZ) / (tape->capabilities.speed * 1000);
+       tmid = (tape->capabilities.buffer_size * 32 * HZ) / (tape->capabilities.speed * 125);
+       tn = (IDETAPE_FIFO_THRESHOLD * tape->stage_size * HZ) / (tape->capabilities.speed * 1000);
+
+       if (tape->max_stages) {
+               if (drive->using_dma)
+                       t = tmid;
+               else {
+                       if (hwif->drives[drive->select.b.unit ^ 1].present || hwif->next != hwif)
+                               t = (tn + tmid) / 2;
+                       else
+                               t = tn;
+               }
+       } else
+               t = t1;
+       t = IDETAPE_MIN (t, tmid);
+
+       /*
+        *      Ensure that the number we got makes sense.
+        */
+       tape->best_dsc_rw_frequency = IDETAPE_MAX (IDETAPE_MIN (t, IDETAPE_DSC_RW_MAX), IDETAPE_DSC_RW_MIN);
+       if (tape->best_dsc_rw_frequency != t) {
+               printk (KERN_NOTICE "ide-tape: Although the recommended polling period is %lu jiffies\n", t);
+               printk (KERN_NOTICE "ide-tape: we will use %lu jiffies\n", tape->best_dsc_rw_frequency);
        }
+       printk (KERN_INFO "ide-tape: %s <-> %s, %dKBps, %d*%dkB buffer, %dkB pipeline, %lums tDSC%s\n",
+               drive->name, tape->name, tape->capabilities.speed, (tape->capabilities.buffer_size * 512) / tape->stage_size,
+               tape->stage_size / 1024, tape->max_stages * tape->stage_size / 1024,
+               tape->best_dsc_rw_frequency * 1000 / HZ, drive->using_dma ? ", DMA":"");
 }
 
-/*
- *     idetape_insert_pipeline_into_queue is used to start servicing the
- *     pipeline stages, starting from tape->next_stage.
- */
-void idetape_insert_pipeline_into_queue (ide_drive_t *drive)
-
+static int idetape_cleanup (ide_drive_t *drive)
 {
-       idetape_tape_t *tape=&(drive->tape);
-
-       if (tape->next_stage == NULL)
-               return;
+       idetape_tape_t *tape = drive->driver_data;
+       int minor = tape->minor;
+       unsigned long flags;
 
-       if (tape->active_data_request == NULL) {
-               idetape_active_next_stage (drive);
-               (void) (ide_do_drive_cmd (drive,tape->active_data_request,ide_end));
-               return;
+       save_flags (flags);
+       cli ();
+       if (test_bit (IDETAPE_BUSY, &tape->flags) || tape->first_stage != NULL || tape->merge_stage_size || drive->usage) {
+               restore_flags(flags);
+               return 1;
        }
-}
+       idetape_chrdevs[minor].drive = NULL;
+       restore_flags (flags);
+       DRIVER(drive)->busy = 0;
+       (void) ide_unregister_subdriver (drive);
+       drive->driver_data = NULL;
+       kfree (tape);
+       for (minor = 0; minor < MAX_HWIFS * MAX_DRIVES; minor++)
+               if (idetape_chrdevs[minor].drive != NULL)
+                       return 0;
+       unregister_chrdev (IDETAPE_MAJOR, "ht");
+       idetape_chrdev_present = 0;
+       return 0;
+}
+
+int idetape_init (void);
+
+static ide_module_t idetape_module = {
+       IDE_DRIVER_MODULE,
+       idetape_init,
+       NULL
+};
 
 /*
- *     idetape_active_next_stage will declare the next stage as "active".
+ *     IDE subdriver functions, registered with ide.c
+ */
+static ide_driver_t idetape_driver = {
+       ide_tape,               /* media */
+       1,                      /* busy */
+       1,                      /* supports_dma */
+       idetape_cleanup,        /* cleanup */
+       idetape_do_request,     /* do_request */
+       idetape_end_request,    /* end_request */
+       idetape_blkdev_ioctl,   /* ioctl */
+       idetape_blkdev_open,    /* open */
+       idetape_blkdev_release, /* release */
+       NULL,                   /* media_change */
+       idetape_pre_reset,      /* pre_reset */
+       NULL,                   /* capacity */
+       NULL                    /* special */
+};
+
+/*
+ *     Our character device supporting functions, passed to register_chrdev.
  */
-void idetape_active_next_stage (ide_drive_t *drive)
+static struct file_operations idetape_fops = {
+       NULL,                   /* lseek - default */
+       idetape_chrdev_read,    /* read  */
+       idetape_chrdev_write,   /* write */
+       NULL,                   /* readdir - bad */
+       NULL,                   /* select */
+       idetape_chrdev_ioctl,   /* ioctl */
+       NULL,                   /* mmap */
+       idetape_chrdev_open,    /* open */
+       idetape_chrdev_release, /* release */
+       NULL,                   /* fsync */
+       NULL,                   /* fasync */
+       NULL,                   /* check_media_change */
+       NULL                    /* revalidate */
+};
+
+/*
+ *     idetape_init will register the driver for each tape.
+ */
+int idetape_init (void)
+{
+       ide_drive_t *drive;
+       idetape_tape_t *tape;
+       int minor, failed = 0, supported = 0;
+
+       MOD_INC_USE_COUNT;
+       if (!idetape_chrdev_present)
+               for (minor = 0; minor < MAX_HWIFS * MAX_DRIVES; minor++ )
+                       idetape_chrdevs[minor].drive = NULL;
+
+       if ((drive = ide_scan_devices (ide_tape, NULL, failed++)) == NULL) {
+               ide_register_module (&idetape_module);
+               MOD_DEC_USE_COUNT;
+               return 0;
+       }
+       if (!idetape_chrdev_present && register_chrdev (IDETAPE_MAJOR, "ht", &idetape_fops)) {
+               printk (KERN_ERR "ide-tape: Failed to register character device interface\n");
+               MOD_DEC_USE_COUNT;
+               return -EBUSY;
+       }
+       do {
+               if (!idetape_identify_device (drive, drive->id)) {
+                       printk (KERN_ERR "ide-tape: %s: not supported by this version of ide-tape\n", drive->name);
+                       continue;
+               }
+               tape = (idetape_tape_t *) kmalloc (sizeof (idetape_tape_t), GFP_KERNEL);
+               if (tape == NULL) {
+                       printk (KERN_ERR "ide-tape: %s: Can't allocate a tape structure\n", drive->name);
+                       continue;
+               }
+               if (ide_register_subdriver (drive, &idetape_driver, IDE_SUBDRIVER_VERSION)) {
+                       printk (KERN_ERR "ide-tape: %s: Failed to register the driver with ide.c\n", drive->name);
+                       kfree (tape);
+                       continue;
+               }
+               for (minor = 0; idetape_chrdevs[minor].drive != NULL; minor++);
+               idetape_setup (drive, tape, minor);
+               idetape_chrdevs[minor].drive = drive;
+               supported++; failed--;
+       } while ((drive = ide_scan_devices (ide_tape, NULL, failed++)) != NULL);
+       if (!idetape_chrdev_present && !supported) {
+               unregister_chrdev (IDETAPE_MAJOR, "ht");
+       } else
+               idetape_chrdev_present = 1;
+       ide_register_module (&idetape_module);
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
 
+#ifdef MODULE
+int init_module (void)
 {
-       idetape_tape_t *tape=&(drive->tape);
-       idetape_pipeline_stage_t *stage=tape->next_stage;
-       struct request *rq=&(stage->rq);
+       return idetape_init ();
+}
 
-#if IDETAPE_DEBUG_LOG
-       printk ("Reached idetape_active_next_stage\n");
-#endif /* IDETAPE_DEBUG_LOG */
-#if IDETAPE_DEBUG_BUGS
-       if (stage == NULL) {
-               printk ("ide-tape: bug: Trying to activate a non existing stage\n");
-               return;
+void cleanup_module (void)
+{
+       ide_drive_t *drive;
+       int minor;
+
+       for (minor = 0; minor < MAX_HWIFS * MAX_DRIVES; minor++) {
+               drive = idetape_chrdevs[minor].drive;
+               if (drive != NULL && idetape_cleanup (drive))
+                       printk (KERN_ERR "ide-tape: %s: cleanup_module() called while still busy\n", drive->name);
        }
-#endif /* IDETAPE_DEBUG_BUGS */        
-       if (rq->cmd == IDETAPE_WRITE_REQUEST)
-               idetape_copy_buffer_from_stage (stage,tape->data_buffer);
-       
-       rq->buffer=tape->data_buffer;
-       tape->active_data_request=rq;
-       tape->active_stage=stage;
-       tape->next_stage=stage->next;
+       ide_unregister_module(&idetape_module);
 }
+#endif /* MODULE */
diff --git a/drivers/block/ide-tape.h b/drivers/block/ide-tape.h
deleted file mode 100644 (file)
index 971c63e..0000000
+++ /dev/null
@@ -1,529 +0,0 @@
-/*
- * linux/drivers/block/ide-tape.h      Version 1.8 - ALPHA     Sep  26, 1996
- *
- * Copyright (C) 1995, 1996 Gadi Oxman <gadio@netvision.net.il>
- */
-
-/*
- * Include file for the IDE ATAPI streaming tape driver.
- *
- * This file contains various ide-tape related structures and function
- * prototypes which are already used in ide.h.
- *
- * The various compile time options are described below.
- */
-
-#ifndef IDETAPE_H
-#define IDETAPE_H 
-
-/**************************** Tunable parameters *****************************/
-
-/*
- *     This is probably the most important configuration option.
- *
- *     Pipelined operation mode has the potential to maximize the
- *     performance of the driver and thus to saturate the throughput
- *     to the maximum value supported by the tape.
- *
- *     In pipelined mode we are servicing requests without blocking the
- *     user backup program. For example, on a write request, we will add it
- *     to the pipeline and return without waiting for it to complete. The
- *     user program will then have enough time to prepare the next blocks
- *     while the tape is still busy working on the previous requests.
- *
- *     Pipelined operation mode is enabled by default, but since it has a
- *     few downfalls as well, you may wish to disable it.
- *     Further explanation of pipelined mode is available in ide-tape.c .
- */
-
-#define        IDETAPE_PIPELINE        1
-
-/*
- *     Pipelined mode parameters.
- *
- *     We try to use the minimum number of stages which is enough to
- *     keep the tape constantly streaming. To accomplish that, we implement
- *     a feedback loop around the maximum number of stages:
- *
- *     We start from MIN maximum stages (we will not even use MIN stages
- *      if we don't need them), increment it by RATE*(MAX-MIN)
- *     whenever we sense that the pipeline is empty, until we reach
- *     the optimum value or until we reach MAX.
- */
-#define        IDETAPE_MIN_PIPELINE_STAGES             100
-#define        IDETAPE_MAX_PIPELINE_STAGES             200
-#define        IDETAPE_INCREASE_STAGES_RATE            20
-
-/*
- *     Assuming the tape shares an interface with another device, the default
- *     behavior is to service our pending pipeline requests as soon as
- *     possible, but to gracefully postpone them in favor of the other device
- *     when the tape is busy. This has the potential to maximize our
- *     throughput and in the same time, to make efficient use of the IDE bus.
- *
- *     Note that when we transfer data to / from the tape, we co-operate with
- *     the relatively fast tape buffers and the tape will perform the
- *     actual media access in the background, without blocking the IDE
- *     bus. This means that as long as the maximum IDE bus throughput is much
- *     higher than the sum of our maximum throughput and the maximum
- *     throughput of the other device, we should probably leave the default
- *     behavior.
- *
- *     However, if it is still desired to give the other device a share even
- *     in our own (small) bus bandwidth, you can set IDETAPE_LOW_TAPE_PRIORITY
- *     to 1. This will let the other device finish *all* its pending requests
- *     before we even check if we can service our next pending request.
- */
-                
-#define IDETAPE_LOW_TAPE_PRIORITY              0
-
-/*
- *     It seems that dynamically allocating buffers of about 32KB
- *     each is doomed to fail, unless we are in or very near the
- *     initialization stage. Take care when changing this value, as it
- *     is now optimized with the design of kmalloc, so that we will not
- *     allocate parts of a page. Setting the size to 512 bytes, for example,
- *     would cause kmalloc to allocate for us 1024 bytes, and to
- *     unnecessarily waste double amount of memory.
- */
-
-#if PAGE_SIZE == 4096
-       #define IDETAPE_ALLOCATION_BLOCK                500
-#elif PAGE_SIZE == 8192
-       #define IDETAPE_ALLOCATION_BLOCK                496
-#else /* ??? Not defined by linux/mm/kmalloc.c */
-       #define IDETAPE_ALLOCATION_BLOCK                512
-#endif
-
-/*
- *     ide-tape currently uses two continuous buffers, each of the size of
- *     one stage. By default, those buffers are allocated at initialization
- *     time and never released, since dynamic allocation of pages bigger
- *     than PAGE_SIZE may fail as memory becomes fragmented.
- *
- *     This results in about 100 KB memory usage when the tape is idle.
- *     Setting IDETAPE_MINIMIZE_IDLE_MEMORY_USAGE to 1 will let ide-tape
- *     to dynamically allocate those buffers, resulting in about 20 KB idle
- *     memory usage.
- */
-#define        IDETAPE_MINIMIZE_IDLE_MEMORY_USAGE      0
-
-/*
- *     The following are used to debug the driver:
- *
- *     Setting IDETAPE_DEBUG_LOG to 1 will log driver flow control.
- *     Setting IDETAPE_DEBUG_BUGS to 1 will enable self-sanity checks in
- *     some places.
- *
- *     Setting them to 0 will restore normal operation mode:
- *
- *             1.      Disable logging normal successful operations.
- *             2.      Disable self-sanity checks.
- *             3.      Errors will still be logged, of course.
- *
- *     All the #if DEBUG code will be removed some day, when the driver
- *     is verified to be stable enough. This will make it much more
- *     esthetic.
- */
-#define        IDETAPE_DEBUG_LOG               0
-#define        IDETAPE_DEBUG_BUGS              1
-
-/*
- *     After each failed packet command we issue a request sense command
- *     and retry the packet command IDETAPE_MAX_PC_RETRIES times.
- *
- *     Setting IDETAPE_MAX_PC_RETRIES to 0 will disable retries.
- */
-
-#define        IDETAPE_MAX_PC_RETRIES  3
-
-/*
- *     With each packet command, we allocate a buffer of
- *     IDETAPE_TEMP_BUFFER_SIZE bytes. This is used for several packet
- *     commands (Not for READ/WRITE commands).
- *
- *     The default below is too high - We should be using around 100 bytes
- *     typically, but I didn't check all the cases, so I rather be on the
- *     safe size.
- */
-#define        IDETAPE_TEMP_BUFFER_SIZE 256
-
-/*
- *     In various places in the driver, we need to allocate storage
- *     for packet commands and requests, which will remain valid while
- *     we leave the driver to wait for an interrupt or a timeout event.
- *
- *     In the corresponding ide_drive_t structure, we pre-allocate storage
- *     for IDETAPE_PC_STACK packet commands and requests. This storage is
- *     used as a circular array - Each time we reach the last entry, we
- *     warp around to the first.
- *
- *     It is crucial that we have enough entries for the maximum number
- *     of packet commands / sub-requests which we need to allocate during
- *     the handling of a specific request.
- *
- *     Follows a worse case calculation of the required storage, with a
- *     large safety margin.
- */
-
-#define        IDETAPE_PC_STACK        20+IDETAPE_MAX_PC_RETRIES
-
-/*
- *     DSC polling parameters.
- *
- *     Polling for DSC (a single bit in the status register) is a very
- *     important function in ide-tape. There are two cases in which we
- *     poll for DSC:
- *
- *     1.      Before a read/write packet command, to ensure that we
- *             can transfer data from/to the tape's data buffers, without
- *             causing an actual media access. In case the tape is not
- *             ready yet, we take out our request from the device
- *             request queue, so that ide.c will service requests from
- *             the other device on the same interface meanwhile.
- *
- *             We can now automatically select the "best" polling frequency.
- *             Have a look at IDETAPE_ANTICIPATE_READ_WRITE_DSC below.
- *
- *             In case you don't want to use the automatic selection,
- *             choose it to be relatively fast. The default fallback
- *             frequency is 1/50 msec.
- *
- *     2.      After the successful initialization of a "media access
- *             packet command", which is a command which can take a long
- *             time to complete (it can be several seconds or even an hour).
- *
- *             Again, we postpone our request in the middle to free the bus
- *             for the other device. The polling frequency here should be
- *             lower than the read/write frequency since those media access
- *             commands are slow. We start from a "fast" frequency -
- *             IDETAPE_DSC_FAST_MEDIA_ACCESS_FREQUENCY (one second), and
- *             if we don't receive DSC after IDETAPE_FAST_SLOW_THRESHOLD
- *             (5 minutes), we switch it to a lower frequency -
- *             IDETAPE_DSC_SLOW_MEDIA_ACCESS_FREQUENCY (1 minute).
- *             
- *     We also set a timeout for the timer, in case something goes wrong.
- *     The timeout should be longer then the maximum execution time of a
- *     tape operation. I still have to measure exactly how much time does
- *     it take to space over a far filemark, etc. It seemed that 15 minutes
- *     was way too low, so I am meanwhile setting it to a rather large
- *     timeout - 2 Hours ...
- *
- */
-
-/*
- *     Setting IDETAPE_ANTICIPATE_READ_WRITE_DSC to 1 will allow ide-tape
- *     to cleverly select the lowest possible frequency which will
- *     not affect performance, based on the tape parameters and our operation
- *     mode. This has potential to dramatically decrease our polling load
- *     on Linux.
- *
- *     However, for the cases in which our calculation fails, setting
- *     the following option to 0 will force the use of the "fallback"
- *     polling period defined below (defaults to 50 msec).
- *
- *     In any case, the frequency will be between the "lowest" value
- *     to the "fallback" value, to ensure that our selected "best" frequency
- *     is reasonable.
- */
-
-#define IDETAPE_ANTICIPATE_READ_WRITE_DSC      1
-
-/*
- *     The following parameter is used to select the point in the internal
- *     tape fifo in which we will start to refill the buffer. Decreasing
- *     the following parameter will improve the system's latency and
- *     interactive response, while using a high value might improve sytem
- *     throughput.
- */
-#define        IDETAPE_FIFO_THRESHOLD                  2
-
-/*
- *     DSC timings.
- */
-#define        IDETAPE_DSC_READ_WRITE_FALLBACK_FREQUENCY   5*HZ/100    /* 50 msec */
-#define IDETAPE_DSC_READ_WRITE_LOWEST_FREQUENCY        40*HZ/100       /* 400 msec */
-#define        IDETAPE_DSC_FAST_MEDIA_ACCESS_FREQUENCY 1*HZ            /* 1 second */
-#define        IDETAPE_FAST_SLOW_THRESHOLD             5*60*HZ         /* 5 minutes */
-#define IDETAPE_DSC_SLOW_MEDIA_ACCESS_FREQUENCY        60*HZ           /* 1 minute */
-#define        IDETAPE_DSC_TIMEOUT                     2*60*60*HZ      /* 2 hours */
-
-/*************************** End of tunable parameters ***********************/
-
-/*
- *     Definitions which are already needed in ide.h
- */
-
-/*
- *     Current character device data transfer direction.
- */
-  
-typedef enum {idetape_direction_none,idetape_direction_read,idetape_direction_write} chrdev_direction_t;
-
-struct ide_drive_s;                            /* Forward declaration - Will be defined later in ide.h */
-typedef void (idetape_pc_completed_t)(struct ide_drive_s *);
-
-/*
- *     Our view of a packet command.
- */
-
-typedef struct idetape_packet_command_s {
-       byte c [12];                            /* Actual packet bytes */
-       
-       byte retries;                           /* On each retry, we increment retries */
-       byte error;                             /* Error code */
-       byte abort;                             /* Set when an error is considered normal - We won't retry */
-       byte wait_for_dsc;                      /* 1 When polling for DSC on a media access command */
-       byte dma_recommended;                   /* 1 when we prefer to use DMA if possible */
-       byte dma_in_progress;                   /* 1 while DMA in progress */
-       byte dma_error;                         /* 1 when encountered problem during DMA */
-       unsigned long request_transfer;         /* Bytes to transfer */
-       unsigned long actually_transferred;     /* Bytes actually transferred */
-       unsigned long buffer_size;              /* Size of our data buffer */
-       byte *buffer;                           /* Data buffer */
-       byte *current_position;                 /* Pointer into the above buffer */
-       byte writing;                           /* Data direction */            
-       idetape_pc_completed_t *callback;       /* Called when this packet command is completed */
-       byte temp_buffer [IDETAPE_TEMP_BUFFER_SIZE];    /* Temporary buffer */
-} idetape_packet_command_t;
-
-/*
- *     Capabilities and Mechanical Status Page
- */
-
-typedef struct {
-       unsigned page_code      :6;     /* Page code - Should be 0x2a */
-       unsigned reserved1_67   :2;
-       byte page_length;               /* Page Length - Should be 0x12 */
-       byte reserved2; 
-       byte reserved3; 
-       unsigned ro             :1;     /* Read Only Mode */
-       unsigned reserved4_1234 :4;
-       unsigned sprev          :1;     /* Supports SPACE in the reverse direction */
-       unsigned reserved4_67   :2;
-       unsigned reserved5_012  :3;
-       unsigned efmt           :1;     /* Supports ERASE command initiated formatting */
-       unsigned reserved5_4    :1;
-       unsigned qfa            :1;     /* Supports the QFA two partition formats */
-       unsigned reserved5_67   :2;
-       unsigned lock           :1;     /* Supports locking the volume */
-       unsigned locked         :1;     /* The volume is locked */
-       unsigned prevent        :1;     /* The device defaults in the prevent state after power up */   
-       unsigned eject          :1;     /* The device can eject the volume */
-       unsigned reserved6_45   :2;     /* Reserved */  
-       unsigned ecc            :1;     /* Supports error correction */
-       unsigned cmprs          :1;     /* Supports data compression */
-       unsigned reserved7_0    :1;
-       unsigned blk512         :1;     /* Supports 512 bytes block size */
-       unsigned blk1024        :1;     /* Supports 1024 bytes block size */
-       unsigned reserved7_3_6  :4;
-       unsigned slowb          :1;     /* The device restricts the byte count for PIO */
-                                       /* transfers for slow buffer memory ??? */
-       unsigned short max_speed;       /* Maximum speed supported in KBps */
-       byte reserved10;
-       byte reserved11;
-       unsigned short ctl;             /* Continuous Transfer Limit in blocks */
-       unsigned short speed;           /* Current Speed, in KBps */
-       unsigned short buffer_size;     /* Buffer Size, in 512 bytes */
-       byte reserved18;
-       byte reserved19;
-} idetape_capabilities_page_t;
-
-/*
- *     A pipeline stage contains several small buffers of type
- *     idetape_buffer_head_t. This is necessary since dynamical allocation
- *     of large (32 KB or so) continuous memory blocks will usually fail.
- */
-typedef struct idetape_buffer_head_s {
-       char *data;                                     /* Pointer to data (512 bytes by default) */
-       struct idetape_buffer_head_s *next;
-} idetape_buffer_head_t;
-
-/*
- *     A pipeline stage.
- *
- *     In a pipeline stage we have a request, pointer to a list of small
- *     buffers, and pointers to the near stages.
- */
-
-typedef struct idetape_pipeline_stage_s {
-       struct request rq;                              /* The corresponding request */
-       idetape_buffer_head_t *bh;                      /* The data buffers */
-       struct idetape_pipeline_stage_s *next,*prev;    /* Pointers to the next and previous stages */
-} idetape_pipeline_stage_t;
-
-/*
- *     Most of our global data which we need to save even as we leave the
- *     driver due to an interrupt or a timer event is stored in a variable
- *     of type tape_info, defined below.
- *
- *     Additional global variables which provide the link between the
- *     character device interface to this structure are defined in
- *     ide-tape.c
- */
-typedef struct {       
-
-       /*
-        *      Since a typical character device operation requires more
-        *      than one packet command, we provide here enough memory
-        *      for the maximum of interconnected packet commands.
-        *      The packet commands are stored in the circular array pc_stack.
-        *      pc_stack_index points to the last used entry, and warps around
-        *      to the start when we get to the last array entry.
-        *
-        *      pc points to the current processed packet command.
-        *
-        *      failed_pc points to the last failed packet command, or contains
-        *      NULL if we do not need to retry any packet command. This is
-        *      required since an additional packet command is needed before the
-        *      retry, to get detailed information on what went wrong.
-        */
-
-       idetape_packet_command_t *pc;           /* Current packet command */
-       idetape_packet_command_t *failed_pc;    /* Last failed packet command */
-       idetape_packet_command_t pc_stack [IDETAPE_PC_STACK]; /* Packet command stack */
-       byte pc_stack_index;                    /* Next free packet command storage space */
-
-       /* 
-        *      The Linux ide driver basically traverses the request lists
-        *      of the ide block devices, finds the next request, completes
-        *      it, and passes to the next one. This is done in ide_do_request.
-        *
-        *      In this regard, ide-tape.c is fully compatible with the rest of
-        *      the ide driver - From the point of view of ide.c, we are just
-        *      another ide block device which receives requests and completes
-        *      them.
-        *
-        *      However, our requests don't originate in the buffer cache but
-        *      rather in ide-tape.c itself. Here we provide safe storage for
-        *      such requests.
-        */
-
-       struct request rq_stack [IDETAPE_PC_STACK];
-       byte rq_stack_index;                    /* We implement a circular array */
-
-       /*
-        *      While polling for DSC we use postponed_rq to postpone the
-        *      current request so that ide.c will be able to service
-        *      pending requests on the other device. Note that at most
-        *      we will have only one DSC (usually data transfer) request
-        *      in the device request queue. Additional request can be
-        *      queued in our internal pipeline, but they will be visible
-        *      to ide.c only one at a time.
-        */
-
-       struct request *postponed_rq;
-       
-       /*
-        *      DSC polling variables.
-        */
-        
-       byte dsc_count;                         /* We received DSC dsc_count times in a row */
-       unsigned long dsc_polling_start;        /* The time in which we started polling for DSC */
-       struct timer_list dsc_timer;            /* Timer used to poll for dsc */
-
-       /*
-        *      We can now be much more clever in our selection of the
-        *      read/write polling frequency. This is used along with
-        *      the compile time option IDETAPE_ANTICIPATE_DSC.
-        */
-       unsigned long best_dsc_rw_frequency;    /* Read/Write dsc polling frequency */
-
-       unsigned long dsc_polling_frequency;    /* The current polling frequency */
-       unsigned long dsc_timeout;              /* Maximum waiting time */
-       byte dsc_received;                      /* Set when we receive DSC */
-
-       byte request_status;
-       byte last_status;                       /* Contents of the tape status register */
-                                               /* before the current request (saved for us */
-                                               /* by ide.c) */
-       /*
-        *      After an ATAPI software reset, the status register will be
-        *      locked, and thus we need to ignore it when checking DSC for
-        *      the first time.
-        */
-        
-       byte reset_issued;
-
-       /* Position information */
-       
-       byte partition_num;                     /* Currently not used */
-       unsigned long block_address;            /* Current block */
-       byte block_address_valid;               /* 0 When the tape position is unknown */
-                                               /* (To the tape or to us) */
-       /* Last error information */
-       
-       byte sense_key,asc,ascq;
-
-       /* Character device operation */
-
-       chrdev_direction_t chrdev_direction;    /* Current character device data transfer direction */
-       byte busy;                              /* Device already opened */
-
-       /* Device information */
-       
-       unsigned short tape_block_size;                 /* Usually 512 or 1024 bytes */
-       idetape_capabilities_page_t capabilities;       /* Copy of the tape's Capabilities and Mechanical Page */
-
-       /*
-        *      Active data transfer request parameters.
-        *
-        *      At most, there is only one ide-tape originated data transfer
-        *      request in the device request queue. This allows ide.c to
-        *      easily service requests from the other device when we
-        *      postpone our active request. In the pipelined operation
-        *      mode, we use our internal pipeline structure to hold
-        *      more data requests.
-        *
-        *      The data buffer size is chosen based on the tape's
-        *      recommendation.
-        */
-       
-       struct request *active_data_request;    /* Pointer to the request which is waiting in the device request queue */
-       char *data_buffer;                      /* The corresponding data buffer (for read/write requests) */
-       int data_buffer_size;                   /* Data buffer size (chosen based on the tape's recommendation */
-
-       char *merge_buffer;                     /* Temporary buffer for user <-> kernel space data transfer */
-       int merge_buffer_offset;
-       int merge_buffer_size;
-       
-       /*
-        *      Pipeline parameters.
-        *
-        *      To accomplish non-pipelined mode, we simply set the following
-        *      variables to zero (or NULL, where appropriate).
-        */
-               
-       int current_number_of_stages;           /* Number of currently used stages */
-       int max_number_of_stages;               /* We will not allocate more than this number of stages */
-       idetape_pipeline_stage_t *first_stage;  /* The first stage which will be removed from the pipeline */
-       idetape_pipeline_stage_t *active_stage; /* The currently active stage */
-       idetape_pipeline_stage_t *next_stage;   /* Will be serviced after the currently active request */
-       idetape_pipeline_stage_t *last_stage;   /* New requests will be added to the pipeline here */
-       int error_in_pipeline_stage;            /* Set when an error was detected in one of the pipeline stages */      
-       
-} idetape_tape_t;
-
-/*
- *     The following is used to have a quick look at the tape's status
- *     register between requests of the other device.
- */
-#define POLL_HWIF_TAPE_DRIVE                                                   \
-       if (hwif->tape_drive != NULL) {                                         \
-               if (hwif->tape_drive->tape.request_status) {                    \
-                       SELECT_DRIVE(hwif,hwif->tape_drive);                    \
-                       hwif->tape_drive->tape.last_status=GET_STAT();          \
-                       hwif->tape_drive->tape.request_status=0;                \
-               }                                                               \
-       }
-
-#endif /* IDETAPE_H */
index c00e3d7a79824e031f0571063b133ced8b86593f..d5193c2eefc3ee8a12b24169a9b632bab4172717 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  linux/drivers/block/ide.c  Version 5.52  Sep  24, 1996
+ *  linux/drivers/block/ide.c  Version 5.60  Nov   5, 1996
  *
  *  Copyright (C) 1994-1996  Linus Torvalds & authors (see below)
  */
  *                     change delay_10ms() to delay_50ms() to fix problems
  * Version 5.52                fix incorrect invalidation of removable devices
  *                     add "hdx=slow" command line option
+ * Version 5.60                start to modularize the driver; the disk and ATAPI
+ *                      drivers can be compiled as loadable modules.
+ *                     move IDE probe code to ide-probe.c
+ *                     move IDE disk code to ide-disk.c
+ *                     add support for generic IDE device subdrivers
+ *                     add m68k code from Geert Uytterhoeven
+ *                     probe all interfaces by default
+ *                     add ioctl to (re)probe an interface
  *
  *  Some additional driver compile-time options are in ide.h
  *
 #undef REALLY_SLOW_IO          /* most systems can safely undef this */
 
 #include <linux/config.h>
+#include <linux/module.h>
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <asm/irq.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
+#include <asm/bitops.h>
 
 #ifdef CONFIG_PCI
 #include <linux/bios32.h>
 #include "ide.h"
 #include "ide_modes.h"
 
-#ifdef CONFIG_BLK_DEV_PROMISE
-#include "promise.h"
-#define IS_PROMISE_DRIVE (HWIF(drive)->chipset == ide_promise)
-#else
-#define IS_PROMISE_DRIVE (0)   /* auto-NULLs out Promise code */
-#endif /* CONFIG_BLK_DEV_PROMISE */
+#ifdef CONFIG_KERNELD
+#include <linux/kerneld.h>
+#endif /* CONFIG_KERNELD */
+
+static const byte      ide_hwif_to_major[] = {IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR};
 
-static const byte      ide_hwif_to_major[MAX_HWIFS] = {IDE0_MAJOR, IDE1_MAJOR, IDE2_MAJOR, IDE3_MAJOR};
-static const unsigned short default_io_base[MAX_HWIFS] = {0x1f0, 0x170, 0x1e8, 0x168};
-static const byte      default_irqs[MAX_HWIFS]     = {14, 15, 11, 10};
 static int     idebus_parameter; /* holds the "idebus=" parameter */
 static int     system_bus_speed; /* holds what we think is VESA/PCI bus speed */
+static int     initializing;     /* set while initializing built-in drivers */
+
+/*
+ * ide_lock is used by the Atari code to obtain access to the IDE interrupt,
+ * which is shared between several drivers.
+ */
+static int     ide_lock = 0;
+
+/*
+ * ide_modules keeps track of the available IDE chipset/probe/driver modules.
+ */
+static ide_module_t *ide_modules = NULL;
 
 /*
  * This is declared extern in ide.h, for access by other IDE modules:
@@ -349,7 +367,6 @@ static void set_recovery_timer (ide_hwif_t *hwif)
 
 #endif /* DISK_RECOVERY_TIME */
 
-
 /*
  * Do not even *think* about calling this!
  */
@@ -367,11 +384,10 @@ static void init_hwif_data (unsigned int index)
 
        /* fill in any non-zero initial values */
        hwif->index     = index;
-       hwif->noprobe   = (index > 1);
-       hwif->io_base   = default_io_base[index];
-       hwif->ctl_port  = hwif->io_base ? hwif->io_base+0x206 : 0x000;
+       ide_init_hwif_ports(hwif->io_ports, ide_default_io_base(index), &hwif->irq);
+       hwif->noprobe   = !hwif->io_ports[IDE_DATA_OFFSET];
 #ifdef CONFIG_BLK_DEV_HD
-       if (hwif->io_base == HD_DATA)
+       if (hwif->io_ports[IDE_DATA_OFFSET] == HD_DATA)
                hwif->noprobe = 1; /* may be overridden by ide_setup() */
 #endif /* CONFIG_BLK_DEV_HD */
        hwif->major     = ide_hwif_to_major[index];
@@ -379,12 +395,10 @@ static void init_hwif_data (unsigned int index)
        hwif->name[1]   = 'd';
        hwif->name[2]   = 'e';
        hwif->name[3]   = '0' + index;
-#ifdef CONFIG_BLK_DEV_IDETAPE
-       hwif->tape_drive = NULL;
-#endif /* CONFIG_BLK_DEV_IDETAPE */
        for (unit = 0; unit < MAX_DRIVES; ++unit) {
                ide_drive_t *drive = &hwif->drives[unit];
 
+               drive->media                    = ide_disk;
                drive->select.all               = (unit<<4)|0xa0;
                drive->hwif                     = hwif;
                drive->ctl                      = 0x08;
@@ -431,7 +445,7 @@ static void init_ide_data (void)
 
 /*
  * ide_system_bus_speed() returns what we think is the system VESA/PCI
- * bus speed (in Mhz).  This is used for calculating interface PIO timings.
+ * bus speed (in MHz).  This is used for calculating interface PIO timings.
  * The default is 40 for known PCI systems, 50 otherwise.
  * The "idebus=xx" parameter can be used to override this value.
  * The actual value to be used is computed/displayed the first time through.
@@ -447,7 +461,8 @@ int ide_system_bus_speed (void)
 #endif /* CONFIG_PCI */
                else
                        system_bus_speed = 50;  /* safe default value for VESA and PCI */
-               printk("ide: Assuming %dMhz system bus speed for PIO modes; override with idebus=xx\n", system_bus_speed);
+               printk("ide: Assuming %dMHz system bus speed for PIO modes%s\n", system_bus_speed,
+                       idebus_parameter ? "" : "; override with idebus=xx");
        }
        return system_bus_speed;
 }
@@ -460,7 +475,7 @@ int ide_system_bus_speed (void)
  * of the sector count register location, with interrupts disabled
  * to ensure that the reads all happen together.
  */
-static inline void do_vlb_sync (unsigned short port) {
+static inline void do_vlb_sync (ide_ioreg_t port) {
        (void) inb (port);
        (void) inb (port);
        (void) inb (port);
@@ -472,32 +487,30 @@ static inline void do_vlb_sync (unsigned short port) {
  */
 void ide_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
 {
-       unsigned short io_base  = HWIF(drive)->io_base;
-       unsigned short data_reg = io_base+IDE_DATA_OFFSET;
        byte io_32bit = drive->io_32bit;
 
        if (io_32bit) {
 #if SUPPORT_VLB_SYNC
                if (io_32bit & 2) {
                        cli();
-                       do_vlb_sync(io_base+IDE_NSECTOR_OFFSET);
-                       insl(data_reg, buffer, wcount);
+                       do_vlb_sync(IDE_NSECTOR_REG);
+                       insl(IDE_DATA_REG, buffer, wcount);
                        if (drive->unmask)
                                sti();
                } else
 #endif /* SUPPORT_VLB_SYNC */
-                       insl(data_reg, buffer, wcount);
+                       insl(IDE_DATA_REG, buffer, wcount);
        } else {
 #if SUPPORT_SLOW_DATA_PORTS
                if (drive->slow) {
                        unsigned short *ptr = (unsigned short *) buffer;
                        while (wcount--) {
-                               *ptr++ = inw_p(data_reg);
-                               *ptr++ = inw_p(data_reg);
+                               *ptr++ = inw_p(IDE_DATA_REG);
+                               *ptr++ = inw_p(IDE_DATA_REG);
                        }
                } else
 #endif /* SUPPORT_SLOW_DATA_PORTS */
-                       insw(data_reg, buffer, wcount<<1);
+                       insw(IDE_DATA_REG, buffer, wcount<<1);
        }
 }
 
@@ -506,33 +519,68 @@ void ide_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
  */
 void ide_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
 {
-       unsigned short io_base  = HWIF(drive)->io_base;
-       unsigned short data_reg = io_base+IDE_DATA_OFFSET;
        byte io_32bit = drive->io_32bit;
 
        if (io_32bit) {
 #if SUPPORT_VLB_SYNC
                if (io_32bit & 2) {
                        cli();
-                       do_vlb_sync(io_base+IDE_NSECTOR_OFFSET);
-                       outsl(data_reg, buffer, wcount);
+                       do_vlb_sync(IDE_NSECTOR_REG);
+                       outsl(IDE_DATA_REG, buffer, wcount);
                        if (drive->unmask)
                                sti();
                } else
 #endif /* SUPPORT_VLB_SYNC */
-                       outsl(data_reg, buffer, wcount);
+                       outsl(IDE_DATA_REG, buffer, wcount);
        } else {
 #if SUPPORT_SLOW_DATA_PORTS
                if (drive->slow) {
                        unsigned short *ptr = (unsigned short *) buffer;
                        while (wcount--) {
-                               outw_p(*ptr++, data_reg);
-                               outw_p(*ptr++, data_reg);
+                               outw_p(*ptr++, IDE_DATA_REG);
+                               outw_p(*ptr++, IDE_DATA_REG);
                        }
                } else
 #endif /* SUPPORT_SLOW_DATA_PORTS */
-                       outsw(data_reg, buffer, wcount<<1);
+                       outsw(IDE_DATA_REG, buffer, wcount<<1);
+       }
+}
+
+/*
+ * The following routines are mainly used by the ATAPI drivers.
+ *
+ * These routines will round up any request for an odd number of bytes,
+ * so if an odd bytecount is specified, be sure that there's at least one
+ * extra byte allocated for the buffer.
+ */
+void atapi_input_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount)
+{
+       ++bytecount;
+#ifdef CONFIG_ATARI
+       if (MACH_IS_ATARI) {
+               /* Atari has a byte-swapped IDE interface */
+               insw_swapw(IDE_DATA_REG, buffer, bytecount / 2);
+               return;
+       }
+#endif /* CONFIG_ATARI */
+       ide_input_data (drive, buffer, bytecount / 4);
+       if ((bytecount & 0x03) >= 2)
+               insw (IDE_DATA_REG, ((byte *)buffer) + (bytecount & ~0x03), 1);
+}
+
+void atapi_output_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount)
+{
+       ++bytecount;
+#ifdef CONFIG_ATARI
+       if (MACH_IS_ATARI) {
+               /* Atari has a byte-swapped IDE interface */
+               outsw_swapw(IDE_DATA_REG, buffer, bytecount / 2);
+               return;
        }
+#endif /* CONFIG_ATARI */
+       ide_output_data (drive, buffer, bytecount / 4);
+       if ((bytecount & 0x03) >= 2)
+               outsw (IDE_DATA_REG, ((byte *)buffer) + (bytecount & ~0x03), 1);
 }
 
 /*
@@ -540,7 +588,7 @@ void ide_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount)
  * wait for an interrupt response from a drive.  handler() points
  * at the appropriate code to handle the next interrupt, and a
  * timer is started to prevent us from waiting forever in case
- * something goes wrong (see the timer_expiry() handler later on).
+ * something goes wrong (see the ide_timer_expiry() handler later on).
  */
 void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, unsigned int timeout)
 {
@@ -556,132 +604,39 @@ void ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, unsigned int t
        add_timer(&(hwgroup->timer));
 }
 
-/*
- * lba_capacity_is_ok() performs a sanity check on the claimed "lba_capacity"
- * value for this drive (from its reported identification information).
- *
- * Returns:    1 if lba_capacity looks sensible
- *             0 otherwise
- */
-static int lba_capacity_is_ok (struct hd_driveid *id)
-{
-       unsigned long lba_sects   = id->lba_capacity;
-       unsigned long chs_sects   = id->cyls * id->heads * id->sectors;
-       unsigned long _10_percent = chs_sects / 10;
-
-       /* perform a rough sanity check on lba_sects:  within 10% is "okay" */
-       if ((lba_sects - chs_sects) < _10_percent)
-               return 1;       /* lba_capacity is good */
-
-       /* some drives have the word order reversed */
-       lba_sects = (lba_sects << 16) | (lba_sects >> 16);
-       if ((lba_sects - chs_sects) < _10_percent) {
-               id->lba_capacity = lba_sects;   /* fix it */
-               return 1;       /* lba_capacity is (now) good */
-       }
-       return 0;       /* lba_capacity value is bad */
-}
-
 /*
  * current_capacity() returns the capacity (in sectors) of a drive
  * according to its current geometry/LBA settings.
  */
-static unsigned long current_capacity (ide_drive_t  *drive)
+static unsigned long current_capacity (ide_drive_t *drive)
 {
-       struct hd_driveid *id = drive->id;
-       unsigned long capacity = drive->cyl * drive->head * drive->sect;
-
        if (!drive->present)
                return 0;
-       if (drive->media != ide_disk)
-               return 0x7fffffff;      /* cdrom or tape */
-       drive->select.b.lba = 0;
-       /* Determine capacity, and use LBA if the drive properly supports it */
-       if (id != NULL && (id->capability & 2) && lba_capacity_is_ok(id)) {
-               if (id->lba_capacity >= capacity) {
-                       capacity = id->lba_capacity;
-                       drive->select.b.lba = 1;
-               }
-       }
-       return (capacity - drive->sect0);
+       if (drive->driver != NULL)
+               return DRIVER(drive)->capacity(drive);
+       return 0;
 }
 
 /*
  * ide_geninit() is called exactly *once* for each major, from genhd.c,
  * at the beginning of the initial partition check for the drives.
  */
-static void ide_geninit (struct gendisk *gd)
+void ide_geninit (struct gendisk *gd)
 {
        unsigned int unit;
        ide_hwif_t *hwif = gd->real_devices;
 
        for (unit = 0; unit < gd->nr_real; ++unit) {
                ide_drive_t *drive = &hwif->drives[unit];
-#ifdef CONFIG_BLK_DEV_IDECD
-               if (drive->present && drive->media == ide_cdrom)
-                       ide_cdrom_setup(drive);
-#endif /* CONFIG_BLK_DEV_IDECD */
-#ifdef CONFIG_BLK_DEV_IDETAPE
-               if (drive->present && drive->media == ide_tape)
-                       idetape_setup(drive);
-#endif /* CONFIG_BLK_DEV_IDETAPE */
+
                drive->part[0].nr_sects = current_capacity(drive);
-               if (!drive->present || drive->media != ide_disk) {
+               if (!drive->present || drive->media != ide_disk || drive->driver == NULL)
                        drive->part[0].start_sect = -1; /* skip partition check */
-               }
-       }
-}
-
-/*
- * init_gendisk() (as opposed to ide_geninit) is called for each major device,
- * after probing for drives, to allocate partition tables and other data
- * structures needed for the routines in genhd.c.  ide_geninit() gets called
- * somewhat later, during the partition check.
- */
-static void init_gendisk (ide_hwif_t *hwif)
-{
-       struct gendisk *gd, **gdp;
-       unsigned int unit, units, minors;
-       int *bs;
-
-       /* figure out maximum drive number on the interface */
-       for (units = MAX_DRIVES; units > 0; --units) {
-               if (hwif->drives[units-1].present)
-                       break;
        }
-       minors    = units * (1<<PARTN_BITS);
-       gd        = kmalloc (sizeof(struct gendisk), GFP_KERNEL);
-       gd->sizes = kmalloc (minors * sizeof(int), GFP_KERNEL);
-       gd->part  = kmalloc (minors * sizeof(struct hd_struct), GFP_KERNEL);
-       bs        = kmalloc (minors*sizeof(int), GFP_KERNEL);
-
-       memset(gd->part, 0, minors * sizeof(struct hd_struct));
-
-       /* cdroms and msdos f/s are examples of non-1024 blocksizes */
-       blksize_size[hwif->major] = bs;
-       for (unit = 0; unit < minors; ++unit)
-               *bs++ = BLOCK_SIZE;
-
-       for (unit = 0; unit < units; ++unit)
-               hwif->drives[unit].part = &gd->part[unit << PARTN_BITS];
-
-       gd->major       = hwif->major;          /* our major device number */
-       gd->major_name  = IDE_MAJOR_NAME;       /* treated special in genhd.c */
-       gd->minor_shift = PARTN_BITS;           /* num bits for partitions */
-       gd->max_p       = 1<<PARTN_BITS;        /* 1 + max partitions / drive */
-       gd->max_nr      = units;                /* max num real drives */
-       gd->nr_real     = units;                /* current num real drives */
-       gd->init        = ide_geninit;          /* initialization function */
-       gd->real_devices= hwif;                 /* ptr to internal data */
-       gd->next        = NULL;                 /* linked list of major devs */
-
-       for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next)) ;
-       hwif->gd = *gdp = gd;                   /* link onto tail of list */
 }
 
 static void do_reset1 (ide_drive_t *, int);            /* needed below */
 
-#ifdef CONFIG_BLK_DEV_IDEATAPI
 /*
  * atapi_reset_pollfunc() gets invoked to poll the interface for completion every 50ms
  * during an atapi drive reset operation. If the drive has not yet responded,
@@ -710,7 +665,6 @@ static void atapi_reset_pollfunc (ide_drive_t *drive)
        }
        hwgroup->poll_timeout = 0;      /* done polling */
 }
-#endif /* CONFIG_BLK_DEV_IDEATAPI */
 
 /*
  * reset_pollfunc() gets invoked to poll the interface for completion every 50ms
@@ -761,6 +715,20 @@ static void reset_pollfunc (ide_drive_t *drive)
        hwgroup->poll_timeout = 0;      /* done polling */
 }
 
+static void pre_reset (ide_drive_t *drive)
+{
+       if (!drive->keep_settings) {
+               drive->unmask = 0;
+               drive->io_32bit = 0;
+               if (drive->using_dma) {
+                       drive->using_dma = 0;
+                       printk("%s: disabled DMA\n", drive->name);
+               }
+       }
+       if (drive->driver != NULL)
+               DRIVER(drive)->pre_reset(drive);
+}
+
 /*
  * do_reset1() attempts to recover a confused drive by resetting it.
  * Unfortunately, resetting a disk drive actually resets all devices on
@@ -786,48 +754,24 @@ static void do_reset1 (ide_drive_t *drive, int  do_not_try_atapi)
        save_flags(flags);
        cli();          /* Why ? */
 
-#ifdef CONFIG_BLK_DEV_IDEATAPI
        /* For an ATAPI device, first try an ATAPI SRST. */
-       if (drive->media != ide_disk) {
-               if (!do_not_try_atapi) {
-                       if (!drive->keep_settings) {
-                               drive->unmask = 0;
-                               drive->io_32bit = 0;
-                       }
-                       OUT_BYTE (drive->select.all, IDE_SELECT_REG);
-                       udelay (20);
-                       OUT_BYTE (WIN_SRST, IDE_COMMAND_REG);
-                       hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
-                       ide_set_handler (drive, &atapi_reset_pollfunc, HZ/20);
-                       restore_flags (flags);
-                       return;
-               }
+       if (drive->media != ide_disk && !do_not_try_atapi) {
+               pre_reset(drive);
+               OUT_BYTE (drive->select.all, IDE_SELECT_REG);
+               udelay (20);
+               OUT_BYTE (WIN_SRST, IDE_COMMAND_REG);
+               hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE;
+               ide_set_handler (drive, &atapi_reset_pollfunc, HZ/20);
+               restore_flags (flags);
+               return;
        }
-#endif /* CONFIG_BLK_DEV_IDEATAPI */
 
        /*
         * First, reset any device state data we were maintaining
         * for any of the drives on this interface.
         */
-       for (unit = 0; unit < MAX_DRIVES; ++unit) {
-               ide_drive_t *rdrive = &hwif->drives[unit];
-               rdrive->special.all = 0;
-               rdrive->special.b.set_geometry = 1;
-               rdrive->special.b.recalibrate  = 1;
-               if (OK_TO_RESET_CONTROLLER)
-                       rdrive->mult_count = 0;
-               if (!rdrive->keep_settings) {
-                       rdrive->mult_req = 0;
-                       rdrive->unmask = 0;
-                       rdrive->io_32bit = 0;
-                       if (rdrive->using_dma) {
-                               rdrive->using_dma = 0;
-                               printk("%s: disabled DMA\n", rdrive->name);
-                       }
-               }
-               if (rdrive->mult_req != rdrive->mult_count)
-                       rdrive->special.b.set_multmode = 1;
-       }
+       for (unit = 0; unit < MAX_DRIVES; ++unit)
+               pre_reset(&hwif->drives[unit]);
 
 #if OK_TO_RESET_CONTROLLER
        /*
@@ -855,10 +799,6 @@ static void do_reset1 (ide_drive_t *drive, int  do_not_try_atapi)
 void ide_do_reset (ide_drive_t *drive)
 {
        do_reset1 (drive, 0);
-#ifdef CONFIG_BLK_DEV_IDETAPE
-       if (drive->media == ide_tape)
-               drive->tape.reset_issued=1;
-#endif /* CONFIG_BLK_DEV_IDETAPE */
 }
 
 /*
@@ -897,7 +837,7 @@ byte ide_dump_status (ide_drive_t *drive, const char *msg, byte stat)
        byte err = 0;
 
        save_flags (flags);
-       sti();
+       ide_sti();
        printk("%s: %s: status=0x%02x", drive->name, msg, stat);
 #if FANCY_STATUS_DUMPS
        if (drive->media == ide_disk) {
@@ -1009,16 +949,10 @@ void ide_error (ide_drive_t *drive, const char *msg, byte stat)
                rq->errors |= ERROR_RESET;      /* Mmmm.. timing problem */
 
        if (rq->errors >= ERROR_MAX) {
-#ifdef CONFIG_BLK_DEV_IDETAPE
-               if (drive->media == ide_tape) {
-                       rq->errors = 0;
-                       idetape_end_request(0, HWGROUP(drive));
-               }
-               else
-#endif /* CONFIG_BLK_DEV_IDETAPE */
+               if (drive->driver != NULL)
+                       DRIVER(drive)->end_request(0, HWGROUP(drive));
                ide_end_request(0, HWGROUP(drive));
-       }
-       else {
+       } else {
                if ((rq->errors & ERROR_RESET) == ERROR_RESET) {
                        ++rq->errors;
                        ide_do_reset(drive);
@@ -1029,154 +963,11 @@ void ide_error (ide_drive_t *drive, const char *msg, byte stat)
        }
 }
 
-/*
- * read_intr() is the handler for disk read/multread interrupts
- */
-static void read_intr (ide_drive_t *drive)
-{
-       byte stat;
-       int i;
-       unsigned int msect, nsect;
-       struct request *rq;
-
-       if (!OK_STAT(stat=GET_STAT(),DATA_READY,BAD_R_STAT)) {
-               ide_error(drive, "read_intr", stat);
-               return;
-       }
-       msect = drive->mult_count;
-read_next:
-       rq = HWGROUP(drive)->rq;
-       if (msect) {
-               if ((nsect = rq->current_nr_sectors) > msect)
-                       nsect = msect;
-               msect -= nsect;
-       } else
-               nsect = 1;
-       ide_input_data(drive, rq->buffer, nsect * SECTOR_WORDS);
-#ifdef DEBUG
-       printk("%s:  read: sectors(%ld-%ld), buffer=0x%08lx, remaining=%ld\n",
-               drive->name, rq->sector, rq->sector+nsect-1,
-               (unsigned long) rq->buffer+(nsect<<9), rq->nr_sectors-nsect);
-#endif
-       rq->sector += nsect;
-       rq->buffer += nsect<<9;
-       rq->errors = 0;
-       i = (rq->nr_sectors -= nsect);
-       if ((rq->current_nr_sectors -= nsect) <= 0)
-               ide_end_request(1, HWGROUP(drive));
-       if (i > 0) {
-               if (msect)
-                       goto read_next;
-               ide_set_handler (drive, &read_intr, WAIT_CMD);
-       }
-}
-
-/*
- * write_intr() is the handler for disk write interrupts
- */
-static void write_intr (ide_drive_t *drive)
-{
-       byte stat;
-       int i;
-       ide_hwgroup_t *hwgroup = HWGROUP(drive);
-       struct request *rq = hwgroup->rq;
-
-       if (OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) {
-#ifdef DEBUG
-               printk("%s: write: sector %ld, buffer=0x%08lx, remaining=%ld\n",
-                       drive->name, rq->sector, (unsigned long) rq->buffer,
-                       rq->nr_sectors-1);
-#endif
-               if ((rq->nr_sectors == 1) ^ ((stat & DRQ_STAT) != 0)) {
-                       rq->sector++;
-                       rq->buffer += 512;
-                       rq->errors = 0;
-                       i = --rq->nr_sectors;
-                       --rq->current_nr_sectors;
-                       if (rq->current_nr_sectors <= 0)
-                               ide_end_request(1, hwgroup);
-                       if (i > 0) {
-                               ide_output_data (drive, rq->buffer, SECTOR_WORDS);
-                               ide_set_handler (drive, &write_intr, WAIT_CMD);
-                       }
-                       return;
-               }
-       }
-       ide_error(drive, "write_intr", stat);
-}
-
-/*
- * ide_multwrite() transfers a block of up to mcount sectors of data
- * to a drive as part of a disk multiple-sector write operation.
- */
-void ide_multwrite (ide_drive_t *drive, unsigned int mcount)
-{
-       struct request *rq = &HWGROUP(drive)->wrq;
-
-       do {
-               unsigned int nsect = rq->current_nr_sectors;
-               if (nsect > mcount)
-                       nsect = mcount;
-               mcount -= nsect;
-
-               ide_output_data(drive, rq->buffer, nsect<<7);
-#ifdef DEBUG
-               printk("%s: multwrite: sector %ld, buffer=0x%08lx, count=%d, remaining=%ld\n",
-                       drive->name, rq->sector, (unsigned long) rq->buffer,
-                       nsect, rq->nr_sectors - nsect);
-#endif
-               if ((rq->nr_sectors -= nsect) <= 0)
-                       break;
-               if ((rq->current_nr_sectors -= nsect) == 0) {
-                       if ((rq->bh = rq->bh->b_reqnext) != NULL) {
-                               rq->current_nr_sectors = rq->bh->b_size>>9;
-                               rq->buffer             = rq->bh->b_data;
-                       } else {
-                               panic("%s: buffer list corrupted\n", drive->name);
-                               break;
-                       }
-               } else {
-                       rq->buffer += nsect << 9;
-               }
-       } while (mcount);
-}
-
-/*
- * multwrite_intr() is the handler for disk multwrite interrupts
- */
-static void multwrite_intr (ide_drive_t *drive)
-{
-       byte stat;
-       int i;
-       ide_hwgroup_t *hwgroup = HWGROUP(drive);
-       struct request *rq = &hwgroup->wrq;
-
-       if (OK_STAT(stat=GET_STAT(),DRIVE_READY,drive->bad_wstat)) {
-               if (stat & DRQ_STAT) {
-                       if (rq->nr_sectors) {
-                               ide_multwrite(drive, drive->mult_count);
-                               ide_set_handler (drive, &multwrite_intr, WAIT_CMD);
-                               return;
-                       }
-               } else {
-                       if (!rq->nr_sectors) {  /* all done? */
-                               rq = hwgroup->rq;
-                               for (i = rq->nr_sectors; i > 0;){
-                                       i -= rq->current_nr_sectors;
-                                       ide_end_request(1, hwgroup);
-                               }
-                               return;
-                       }
-               }
-       }
-       ide_error(drive, "multwrite_intr", stat);
-}
-
 /*
  * Issue a simple drive command
  * The drive must be selected beforehand.
  */
-static void ide_cmd(ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler)
+void ide_cmd(ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler)
 {
        ide_set_handler (drive, handler, WAIT_CMD);
        OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
@@ -1184,47 +975,6 @@ static void ide_cmd(ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *han
        OUT_BYTE(cmd,IDE_COMMAND_REG);
 }
 
-/*
- * set_multmode_intr() is invoked on completion of a WIN_SETMULT cmd.
- */
-static void set_multmode_intr (ide_drive_t *drive)
-{
-       byte stat = GET_STAT();
-
-       sti();
-       if (OK_STAT(stat,READY_STAT,BAD_STAT)) {
-               drive->mult_count = drive->mult_req;
-       } else {
-               drive->mult_req = drive->mult_count = 0;
-               drive->special.b.recalibrate = 1;
-               (void) ide_dump_status(drive, "set_multmode", stat);
-       }
-}
-
-/*
- * set_geometry_intr() is invoked on completion of a WIN_SPECIFY cmd.
- */
-static void set_geometry_intr (ide_drive_t *drive)
-{
-       byte stat = GET_STAT();
-
-       sti();
-       if (!OK_STAT(stat,READY_STAT,BAD_STAT))
-               ide_error(drive, "set_geometry_intr", stat);
-}
-
-/*
- * recal_intr() is invoked on completion of a WIN_RESTORE (recalibrate) cmd.
- */
-static void recal_intr (ide_drive_t *drive)
-{
-       byte stat = GET_STAT();
-
-       sti();
-       if (!OK_STAT(stat,READY_STAT,BAD_STAT))
-               ide_error(drive, "recal_intr", stat);
-}
-
 /*
  * drive_cmd_intr() is invoked on completion of a special DRIVE_CMD.
  */
@@ -1234,7 +984,7 @@ static void drive_cmd_intr (ide_drive_t *drive)
        byte *args = (byte *) rq->buffer;
        byte stat = GET_STAT();
 
-       sti();
+       ide_sti();
        if ((stat & DRQ_STAT) && args && args[3]) {
                byte io_32bit = drive->io_32bit;
                drive->io_32bit = 0;
@@ -1259,38 +1009,16 @@ static inline void do_special (ide_drive_t *drive)
 #ifdef DEBUG
        printk("%s: do_special: 0x%02x\n", drive->name, s->all);
 #endif
-       if (s->b.set_geometry) {
-               s->b.set_geometry = 0;
-               if (drive->media == ide_disk) {
-                       OUT_BYTE(drive->sect,IDE_SECTOR_REG);
-                       OUT_BYTE(drive->cyl,IDE_LCYL_REG);
-                       OUT_BYTE(drive->cyl>>8,IDE_HCYL_REG);
-                       OUT_BYTE(((drive->head-1)|drive->select.all)&0xBF,IDE_SELECT_REG);
-                       if (!IS_PROMISE_DRIVE)
-                               ide_cmd(drive, WIN_SPECIFY, drive->sect, &set_geometry_intr);
-               }
-       } else if (s->b.recalibrate) {
-               s->b.recalibrate = 0;
-               if (drive->media == ide_disk && !IS_PROMISE_DRIVE)
-                       ide_cmd(drive, WIN_RESTORE, drive->sect, &recal_intr);
-       } else if (s->b.set_tune) {
+       if (s->b.set_tune) {
                ide_tuneproc_t *tuneproc = HWIF(drive)->tuneproc;
                s->b.set_tune = 0;
                if (tuneproc != NULL)
                        tuneproc(drive, drive->tune_req);
-       } else if (s->b.set_multmode) {
-               s->b.set_multmode = 0;
-               if (drive->media == ide_disk) {
-                       if (drive->id && drive->mult_req > drive->id->max_multsect)
-                               drive->mult_req = drive->id->max_multsect;
-                       if (!IS_PROMISE_DRIVE)
-                               ide_cmd(drive, WIN_SETMULT, drive->mult_req, &set_multmode_intr);
-               } else
-                       drive->mult_req = 0;
+       } else if (drive->driver != NULL) {
+               DRIVER(drive)->special(drive);
        } else if (s->all) {
-               int special = s->all;
+               printk("%s: bad special flag: 0x%02x\n", drive->name, s->all);
                s->all = 0;
-               printk("%s: bad special flag: 0x%02x\n", drive->name, special);
        }
 }
 
@@ -1320,7 +1048,7 @@ test:
        }
 
        save_flags(flags);
-       sti();
+       ide_sti();
        timeout += jiffies;
        do {
                if (!((stat = GET_STAT()) & BUSY_STAT)) {
@@ -1334,98 +1062,6 @@ test:
        return 1;
 }
 
-/*
- * do_rw_disk() issues READ and WRITE commands to a disk,
- * using LBA if supported, or CHS otherwise, to address sectors.
- * It also takes care of issuing special DRIVE_CMDs.
- */
-static inline void do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block)
-{
-       ide_hwif_t *hwif = HWIF(drive);
-       unsigned short io_base = hwif->io_base;
-#ifdef CONFIG_BLK_DEV_PROMISE
-       int use_promise_io = 0;
-#endif /* CONFIG_BLK_DEV_PROMISE */
-
-       OUT_BYTE(drive->ctl,IDE_CONTROL_REG);
-       OUT_BYTE(rq->nr_sectors,io_base+IDE_NSECTOR_OFFSET);
-#ifdef CONFIG_BLK_DEV_PROMISE
-       if (IS_PROMISE_DRIVE) {
-               if (hwif->is_promise2 || rq->cmd == READ) {
-                       use_promise_io = 1;
-               }
-       }
-       if (drive->select.b.lba || use_promise_io) {
-#else /* !CONFIG_BLK_DEV_PROMISE */
-       if (drive->select.b.lba) {
-#endif /* CONFIG_BLK_DEV_PROMISE */
-#ifdef DEBUG
-               printk("%s: %sing: LBAsect=%ld, sectors=%ld, buffer=0x%08lx\n",
-                       drive->name, (rq->cmd==READ)?"read":"writ",
-                       block, rq->nr_sectors, (unsigned long) rq->buffer);
-#endif
-               OUT_BYTE(block,io_base+IDE_SECTOR_OFFSET);
-               OUT_BYTE(block>>=8,io_base+IDE_LCYL_OFFSET);
-               OUT_BYTE(block>>=8,io_base+IDE_HCYL_OFFSET);
-               OUT_BYTE(((block>>8)&0x0f)|drive->select.all,io_base+IDE_SELECT_OFFSET);
-       } else {
-               unsigned int sect,head,cyl,track;
-               track = block / drive->sect;
-               sect  = block % drive->sect + 1;
-               OUT_BYTE(sect,io_base+IDE_SECTOR_OFFSET);
-               head  = track % drive->head;
-               cyl   = track / drive->head;
-               OUT_BYTE(cyl,io_base+IDE_LCYL_OFFSET);
-               OUT_BYTE(cyl>>8,io_base+IDE_HCYL_OFFSET);
-               OUT_BYTE(head|drive->select.all,io_base+IDE_SELECT_OFFSET);
-#ifdef DEBUG
-               printk("%s: %sing: CHS=%d/%d/%d, sectors=%ld, buffer=0x%08lx\n",
-                       drive->name, (rq->cmd==READ)?"read":"writ", cyl,
-                       head, sect, rq->nr_sectors, (unsigned long) rq->buffer);
-#endif
-       }
-#ifdef CONFIG_BLK_DEV_PROMISE
-       if (use_promise_io) {
-               do_promise_io (drive, rq);
-               return;
-       }
-#endif /* CONFIG_BLK_DEV_PROMISE */
-       if (rq->cmd == READ) {
-#ifdef CONFIG_BLK_DEV_TRITON
-               if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_read, drive)))
-                       return;
-#endif /* CONFIG_BLK_DEV_TRITON */
-               ide_set_handler(drive, &read_intr, WAIT_CMD);
-               OUT_BYTE(drive->mult_count ? WIN_MULTREAD : WIN_READ, io_base+IDE_COMMAND_OFFSET);
-               return;
-       }
-       if (rq->cmd == WRITE) {
-#ifdef CONFIG_BLK_DEV_TRITON
-               if (drive->using_dma && !(HWIF(drive)->dmaproc(ide_dma_write, drive)))
-                       return;
-#endif /* CONFIG_BLK_DEV_TRITON */
-               OUT_BYTE(drive->mult_count ? WIN_MULTWRITE : WIN_WRITE, io_base+IDE_COMMAND_OFFSET);
-               if (ide_wait_stat(drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
-                       printk("%s: no DRQ after issuing %s\n", drive->name,
-                               drive->mult_count ? "MULTWRITE" : "WRITE");
-                       return;
-               }
-               if (!drive->unmask)
-                       cli();
-               if (drive->mult_count) {
-                       HWGROUP(drive)->wrq = *rq; /* scratchpad */
-                       ide_set_handler (drive, &multwrite_intr, WAIT_CMD);
-                       ide_multwrite(drive, drive->mult_count);
-               } else {
-                       ide_set_handler (drive, &write_intr, WAIT_CMD);
-                       ide_output_data(drive, rq->buffer, SECTOR_WORDS);
-               }
-               return;
-       }
-       printk("%s: bad command: %d\n", drive->name, rq->cmd);
-       ide_end_request(0, HWGROUP(drive));
-}
-
 /*
  * execute_drive_cmd() issues a special drive command,
  * usually initiated by ioctl() from the external hdparm program.
@@ -1461,9 +1097,9 @@ static inline void do_request (ide_hwif_t *hwif, struct request *rq)
 {
        unsigned int minor, unit;
        unsigned long block, blockend;
-       ide_drive_t *drive;
+       ide_drive_t *drive = NULL;
 
-       sti();
+       ide_sti();
 #ifdef DEBUG
        printk("%s: do_request: current=0x%08lx\n", hwif->name, (unsigned long) rq);
 #endif
@@ -1498,10 +1134,6 @@ static inline void do_request (ide_hwif_t *hwif, struct request *rq)
        while ((read_timer() - hwif->last_time) < DISK_RECOVERY_TIME);
 #endif
 
-#ifdef CONFIG_BLK_DEV_IDETAPE
-       POLL_HWIF_TAPE_DRIVE;   /* macro from ide-tape.h */
-#endif /* CONFIG_BLK_DEV_IDETAPE */
-
        SELECT_DRIVE(hwif,drive);
        if (ide_wait_stat(drive, drive->ready_stat, BUSY_STAT|DRQ_STAT, WAIT_READY)) {
                printk("%s: drive not ready for command\n", drive->name);
@@ -1513,36 +1145,20 @@ static inline void do_request (ide_hwif_t *hwif, struct request *rq)
                        execute_drive_cmd(drive, rq);
                        return;
                }
-#ifdef CONFIG_BLK_DEV_IDEATAPI
-               switch (drive->media) {
-                       case ide_disk:
-                               do_rw_disk (drive, rq, block);
-                               return;
-#ifdef CONFIG_BLK_DEV_IDECD
-                       case ide_cdrom:
-                               ide_do_rw_cdrom (drive, block);
-                               return;
-#endif /* CONFIG_BLK_DEV_IDECD */
-#ifdef CONFIG_BLK_DEV_IDETAPE
-                       case ide_tape:
-                               idetape_do_request (drive, rq, block);
-                               return;
-#endif /* CONFIG_BLK_DEV_IDETAPE */
-
-                       default:
-                               printk("%s: media type %d not supported\n",
-                                       drive->name, drive->media);
-                               goto kill_rq;
+               if (drive->driver != NULL) {
+                       DRIVER(drive)->do_request(drive, rq, block);
+                       return;
                }
-#else
-               do_rw_disk (drive, rq, block); /* simpler and faster */
-               return;
-#endif /* CONFIG_BLK_DEV_IDEATAPI */;
+               printk("%s: media type %d not supported\n", drive->name, drive->media);
+               goto kill_rq;
        }
        do_special(drive);
        return;
 kill_rq:
-       ide_end_request(0, hwif->hwgroup);
+       if (drive != NULL && drive->driver != NULL)
+               DRIVER(drive)->end_request(0, HWGROUP(drive));
+       else
+               ide_end_request(0, hwif->hwgroup);
 }
 
 /*
@@ -1572,7 +1188,7 @@ void ide_do_request (ide_hwgroup_t *hwgroup)
                struct request *rq;
                if ((rq = hwgroup->rq) == NULL) {
                        if (hwif->sharing_irq && hwgroup->drive) /* set nIEN */
-                               OUT_BYTE(hwgroup->drive->ctl|2,hwif->ctl_port);
+                               OUT_BYTE(hwgroup->drive->ctl|2,hwif->io_ports[IDE_CONTROL_OFFSET]);
                        /*
                         * hwgroup->next_hwif is different from hwgroup->hwif
                         * only when a request is inserted using "ide_next".
@@ -1584,6 +1200,7 @@ void ide_do_request (ide_hwgroup_t *hwgroup)
                                if (rq != NULL && rq->rq_status != RQ_INACTIVE)
                                        goto got_rq;
                        } while ((hwif = hwif->next) != hwgroup->next_hwif);
+                       ide_release_lock(&ide_lock);
                        return;         /* no work left for this hwgroup */
                }
        got_rq: 
@@ -1608,6 +1225,8 @@ static void do_hwgroup_request (ide_hwgroup_t *hwgroup)
        if (hwgroup->handler == NULL) {
                ide_hwif_t *hgif = hwgroup->hwif;
                ide_hwif_t *hwif = hgif;
+
+               ide_get_lock(&ide_lock, ide_intr, hwgroup);
                do {
                        disable_irq(hwif->irq);
                } while ((hwif = hwif->next) != hgif);
@@ -1618,33 +1237,33 @@ static void do_hwgroup_request (ide_hwgroup_t *hwgroup)
        }
 }
 
-static void do_ide0_request (void)     /* invoked with cli() */
+void do_ide0_request (void)    /* invoked with cli() */
 {
        do_hwgroup_request (ide_hwifs[0].hwgroup);
 }
 
 #if MAX_HWIFS > 1
-static void do_ide1_request (void)     /* invoked with cli() */
+void do_ide1_request (void)    /* invoked with cli() */
 {
        do_hwgroup_request (ide_hwifs[1].hwgroup);
 }
-#endif
+#endif /* MAX_HWIFS > 1 */
 
 #if MAX_HWIFS > 2
-static void do_ide2_request (void)     /* invoked with cli() */
+void do_ide2_request (void)    /* invoked with cli() */
 {
        do_hwgroup_request (ide_hwifs[2].hwgroup);
 }
-#endif
+#endif /* MAX_HWIFS > 2 */
 
 #if MAX_HWIFS > 3
-static void do_ide3_request (void)     /* invoked with cli() */
+void do_ide3_request (void)    /* invoked with cli() */
 {
        do_hwgroup_request (ide_hwifs[3].hwgroup);
 }
-#endif
+#endif /* MAX_HWIFS > 3 */
 
-static void timer_expiry (unsigned long data)
+void ide_timer_expiry (unsigned long data)
 {
        ide_hwgroup_t *hwgroup = (ide_hwgroup_t *) data;
        ide_drive_t   *drive   = hwgroup->drive;
@@ -1658,7 +1277,7 @@ static void timer_expiry (unsigned long data)
                hwgroup->handler = NULL;
                handler(drive);
        } else if (hwgroup->handler == NULL) {   /* not waiting for anything? */
-               sti(); /* drive must have responded just as the timer expired */
+               ide_sti(); /* drive must have responded just as the timer expired */
                printk("%s: marginal timeout\n", drive->name);
        } else {
                hwgroup->handler = NULL;        /* abort the operation */
@@ -1736,17 +1355,21 @@ void ide_intr (int irq, void *dev_id, struct pt_regs *regs)
        ide_hwgroup_t *hwgroup = dev_id;
        ide_handler_t *handler;
 
+       if (!ide_ack_intr (hwgroup->hwif->io_ports[IDE_DATA_OFFSET],
+                          hwgroup->hwif->io_ports[IDE_IRQ_OFFSET]))
+               return;
+
        if (irq == hwgroup->hwif->irq && (handler = hwgroup->handler) != NULL) {
                ide_drive_t *drive = hwgroup->drive;
                hwgroup->handler = NULL;
                del_timer(&(hwgroup->timer));
                if (drive->unmask)
-                       sti();
+                       ide_sti();
                handler(drive);
                cli();  /* this is necessary, as next rq may be different irq */
                if (hwgroup->handler == NULL) {
                        SET_RECOVERY_TIMER(HWIF(drive));
-                       ide_do_request(hwgroup);
+                       ide_do_request(hwgroup);
                }
        } else {
                unexpected_intr(irq, hwgroup);
@@ -1795,12 +1418,6 @@ void ide_init_drive_cmd (struct request *rq)
        rq->bh = NULL;
        rq->bhtail = NULL;
        rq->next = NULL;
-
-#if 0  /* these are done each time through ide_do_drive_cmd() */
-       rq->errors = 0;
-       rq->rq_status = RQ_ACTIVE;
-       rq->rq_dev = ????;
-#endif
 }
 
 /*
@@ -1876,41 +1493,121 @@ int ide_do_drive_cmd (ide_drive_t *drive, struct request *rq, ide_action_t actio
        return rq->errors ? -EIO : 0;   /* return -EIO if errors */
 }
 
-static int ide_open(struct inode * inode, struct file * filp)
+/*
+ * This routine is called to flush all partitions and partition tables
+ * for a changed disk, and then re-read the new partition table.
+ * If we are revalidating a disk because of a media change, then we
+ * enter with usage == 0.  If we are using an ioctl, we automatically have
+ * usage == 1 (we need an open channel to use an ioctl :-), so this
+ * is our limit.
+ */
+int ide_revalidate_disk(kdev_t i_rdev)
 {
        ide_drive_t *drive;
-       unsigned long flags;
+       unsigned int p, major, minor;
+       long flags;
 
-       if ((drive = get_info_ptr(inode->i_rdev)) == NULL)
-               return -ENXIO;
+       if ((drive = get_info_ptr(i_rdev)) == NULL)
+               return -ENODEV;
+       major = MAJOR(i_rdev);
+       minor = drive->select.b.unit << PARTN_BITS;
        save_flags(flags);
        cli();
+       if (drive->busy || (drive->usage > 1)) {
+               restore_flags(flags);
+               return -EBUSY;
+       };
+       drive->busy = 1;
+       MOD_INC_USE_COUNT;
+       restore_flags(flags);
+
+       for (p = 0; p < (1<<PARTN_BITS); ++p) {
+               if (drive->part[p].nr_sects > 0) {
+                       kdev_t devp = MKDEV(major, minor+p);
+                       fsync_dev          (devp);
+                       invalidate_inodes  (devp);
+                       invalidate_buffers (devp);
+               }
+               drive->part[p].start_sect = 0;
+               drive->part[p].nr_sects   = 0;
+       };
+
+       drive->part[0].nr_sects = current_capacity(drive);
+       if (drive->media != ide_disk || drive->driver == NULL)
+               drive->part[0].start_sect = -1;
+       resetup_one_dev(HWIF(drive)->gd, drive->select.b.unit);
+
+       drive->busy = 0;
+       wake_up(&drive->wqueue);
+       MOD_DEC_USE_COUNT;
+       return 0;
+}
+
+static void revalidate_drives (void)
+{
+       ide_hwif_t *hwif;
+       ide_drive_t *drive;
+       int index, unit;
+
+       for (index = 0; index < MAX_HWIFS; ++index) {
+               hwif = &ide_hwifs[index];
+               for (unit = 0; unit < MAX_DRIVES; ++unit) {
+                       drive = &ide_hwifs[index].drives[unit];
+                       if (drive->revalidate) {
+                               drive->revalidate = 0;
+                               if (!initializing)
+                                       (void) ide_revalidate_disk(MKDEV(hwif->major, unit<<PARTN_BITS));
+                       }
+               }
+       }
+}
+
+static void ide_init_module (int type)
+{
+       ide_module_t *module = ide_modules;
+       
+       while (module) {
+               if (module->type == type)
+                       (void) module->init();
+               module = module->next;
+       }
+       revalidate_drives();
+}
+
+static int ide_open(struct inode * inode, struct file * filp)
+{
+       ide_drive_t *drive;
+       int rc;
+
+       if ((drive = get_info_ptr(inode->i_rdev)) == NULL)
+               return -ENXIO;
+       MOD_INC_USE_COUNT;
+       if (drive->driver == NULL)
+               ide_init_module(IDE_DRIVER_MODULE);
+#ifdef CONFIG_KERNELD
+       if (drive->driver == NULL) {
+               if (drive->media == ide_disk)
+                       (void) request_module("ide-disk");
+               if (drive->media == ide_cdrom)
+                       (void) request_module("ide-cd");
+               if (drive->media == ide_tape)
+                       (void) request_module("ide-tape");
+               if (drive->media == ide_floppy)
+                       (void) request_module("ide-floppy");
+       }
+#endif /* CONFIG_KERNELD */
        while (drive->busy)
                sleep_on(&drive->wqueue);
        drive->usage++;
-       restore_flags(flags);
-#ifdef CONFIG_BLK_DEV_IDECD
-       if (drive->media == ide_cdrom)
-               return ide_cdrom_open (inode, filp, drive);
-#endif /* CONFIG_BLK_DEV_IDECD */
-#ifdef CONFIG_BLK_DEV_IDETAPE
-       if (drive->media == ide_tape)
-               return idetape_blkdev_open (inode, filp, drive);
-#endif /* CONFIG_BLK_DEV_IDETAPE */
-       if (drive->removable && drive->usage == 1) {
-               byte door_lock[] = {WIN_DOORLOCK,0,0,0};
-               struct request rq;
-               check_disk_change(inode->i_rdev);
-               ide_init_drive_cmd (&rq);
-               rq.buffer = door_lock;
-               /*
-                * Ignore the return code from door_lock,
-                * since the open() has already succeeded,
-                * and the door_lock is irrelevant at this point.
-                */
-               (void) ide_do_drive_cmd(drive, &rq, ide_wait);
+       if (drive->driver != NULL) {
+               if ((rc = DRIVER(drive)->open(inode, filp, drive)))
+                       MOD_DEC_USE_COUNT;
+               return rc;
        }
-       return 0;
+       printk ("%s: driver not present\n", drive->name);
+       drive->usage--;
+       MOD_DEC_USE_COUNT;
+       return -ENXIO;
 }
 
 /*
@@ -1924,88 +1621,137 @@ static void ide_release(struct inode * inode, struct file * file)
        if ((drive = get_info_ptr(inode->i_rdev)) != NULL) {
                fsync_dev(inode->i_rdev);
                drive->usage--;
-#ifdef CONFIG_BLK_DEV_IDECD
-               if (drive->media == ide_cdrom) {
-                       ide_cdrom_release (inode, file, drive);
-                       return;
-               }
-#endif /* CONFIG_BLK_DEV_IDECD */
-#ifdef CONFIG_BLK_DEV_IDETAPE
-               if (drive->media == ide_tape) {
-                       idetape_blkdev_release (inode, file, drive);
-                       return;
-               }
-#endif /* CONFIG_BLK_DEV_IDETAPE */
-               if (drive->removable && !drive->usage) {
-                       byte door_unlock[] = {WIN_DOORUNLOCK,0,0,0};
-                       struct request rq;
-                       invalidate_buffers(inode->i_rdev);
-                       ide_init_drive_cmd (&rq);
-                       rq.buffer = door_unlock;
-                       (void) ide_do_drive_cmd(drive, &rq, ide_wait);
-               }
+               if (drive->driver != NULL)
+                       DRIVER(drive)->release(inode, file, drive);
+               MOD_DEC_USE_COUNT;
        }
 }
 
-/*
- * This routine is called to flush all partitions and partition tables
- * for a changed disk, and then re-read the new partition table.
- * If we are revalidating a disk because of a media change, then we
- * enter with usage == 0.  If we are using an ioctl, we automatically have
- * usage == 1 (we need an open channel to use an ioctl :-), so this
- * is our limit.
- */
-static int revalidate_disk(kdev_t i_rdev)
+void ide_unregister (unsigned int index)
 {
+       struct gendisk *gd, **gdp;
        ide_drive_t *drive;
-       unsigned int p, major, minor;
-       long flags;
-
-       if ((drive = get_info_ptr(i_rdev)) == NULL)
-               return -ENODEV;
+       ide_hwif_t *hwif, *g;
+       ide_hwgroup_t *hwgroup;
+       int irq_count = 0, unit;
+       unsigned long flags;
 
-       major = MAJOR(i_rdev);
-       minor = drive->select.b.unit << PARTN_BITS;
+       if (index >= MAX_HWIFS)
+               return;
        save_flags(flags);
        cli();
-       if (drive->busy || (drive->usage > 1)) {
-               restore_flags(flags);
-               return -EBUSY;
-       };
-       drive->busy = 1;
-       restore_flags(flags);
-
-       for (p = 0; p < (1<<PARTN_BITS); ++p) {
-               if (drive->part[p].nr_sects > 0) {
-                       kdev_t devp = MKDEV(major, minor+p);
-                       fsync_dev          (devp);
-                       invalidate_inodes  (devp);
-                       invalidate_buffers (devp);
+       hwif = &ide_hwifs[index];
+       if (!hwif->present)
+               goto abort;
+       for (unit = 0; unit < MAX_DRIVES; ++unit) {
+               drive = &hwif->drives[unit];
+               if (!drive->present)
+                       continue;
+               if (drive->busy || drive->usage)
+                       goto abort;
+               if (drive->driver != NULL && DRIVER(drive)->cleanup(drive))
+                       goto abort;
+               if (drive->id != NULL) {
+                       kfree(drive->id);
+                       drive->id = NULL;
                }
-               drive->part[p].start_sect = 0;
-               drive->part[p].nr_sects   = 0;
-       };
+               drive->present = 0;
+       }
+       hwif->present = 0;
+       hwgroup = hwif->hwgroup;
 
-       drive->part[0].nr_sects = current_capacity(drive);
-       if (drive->media != ide_disk)
-               drive->part[0].start_sect = -1;
-       resetup_one_dev(HWIF(drive)->gd, drive->select.b.unit);
+       /*
+        * free the irq if we were the only hwif using it
+        */
+       g = hwgroup->hwif;
+       do {
+               if (g->irq == hwif->irq)
+                       ++irq_count;
+               g = g->next;
+       } while (g != hwgroup->hwif);
+       if (irq_count == 1)
+               free_irq(hwif->irq, hwgroup);
 
-       drive->busy = 0;
-       wake_up(&drive->wqueue);
-       return 0;
+       /*
+        * Note that we only release the standard ports,
+        * and do not even try to handle any extra ports
+        * allocated for weird IDE interface chipsets.
+        */
+       ide_release_region(hwif->io_ports[IDE_DATA_OFFSET], 8);
+       ide_release_region(hwif->io_ports[IDE_CONTROL_OFFSET], 1);
+
+       /*
+        * Remove us from the hwgroup, and free
+        * the hwgroup if we were the only member
+        */
+       while (hwgroup->hwif->next != hwif)
+               hwgroup->hwif = hwgroup->hwif->next;
+       hwgroup->hwif->next = hwif->next;
+       if (hwgroup->hwif == hwif)
+               hwgroup->hwif = hwif->next;
+       if (hwgroup->next_hwif == hwif)
+               hwgroup->next_hwif = hwif->next;
+       if (hwgroup->hwif == hwif)
+               kfree(hwgroup);
+
+       /*
+        * Remove us from the kernel's knowledge
+        */
+       unregister_blkdev(hwif->major, hwif->name);
+       kfree(blksize_size[hwif->major]);
+       blk_dev[hwif->major].request_fn = NULL;
+       blksize_size[hwif->major] = NULL;
+       for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next))
+               if (*gdp == hwif->gd)
+                       break;
+       if (*gdp == NULL)
+               printk("gd not in disk chain!\n");
+       else {
+               gd = *gdp; *gdp = gd->next;
+               kfree(gd->sizes);
+               kfree(gd->part);
+               kfree(gd);
+       }
+       init_hwif_data (index); /* restore hwif data to pristine status */
+abort:
+       restore_flags(flags);
 }
 
-static int write_fs_long (unsigned long useraddr, long value)
+int ide_register (int arg1, int arg2, int irq)
 {
-       int err;
+       int index, retry = 1;
+       ide_hwif_t *hwif;
+       ide_ioreg_t data_port = (ide_ioreg_t) arg1, ctl_port = (ide_ioreg_t) arg2;
 
-       if (NULL == (long *)useraddr)
-               return -EINVAL;
-       if ((err = verify_area(VERIFY_WRITE, (long *)useraddr, sizeof(long))))
-               return err;
-       put_user((unsigned)value, (long *) useraddr);
-       return 0;
+       do {
+               for (index = 0; index < MAX_HWIFS; ++index) {
+                       hwif = &ide_hwifs[index];
+                       if (hwif->io_ports[IDE_DATA_OFFSET] == data_port)
+                               goto found;
+               }
+               for (index = 0; index < MAX_HWIFS; ++index) {
+                       hwif = &ide_hwifs[index];
+                       if (!hwif->present) {
+                               ide_init_hwif_ports(hwif->io_ports, data_port, &hwif->irq);
+                               if (ctl_port)
+                                       hwif->io_ports[IDE_CONTROL_OFFSET] = ctl_port;
+                               hwif->irq = irq;
+                               goto found;
+                       }
+               }
+               for (index = 0; index < MAX_HWIFS; index++)
+                       ide_unregister(index);
+       } while (retry--);
+       return -1;
+found:
+       if (hwif->present)
+               ide_unregister(index);
+       if (hwif->present)
+               return -1;
+       hwif->noprobe = 0;
+       ide_init_module(IDE_PROBE_MODULE);
+       ide_init_module(IDE_DRIVER_MODULE);
+       return hwif->present ? index : -1;
 }
 
 static int ide_ioctl (struct inode *inode, struct file *file,
@@ -2026,13 +1772,11 @@ static int ide_ioctl (struct inode *inode, struct file *file,
                {
                        struct hd_geometry *loc = (struct hd_geometry *) arg;
                        if (!loc || drive->media != ide_disk) return -EINVAL;
-                       err = verify_area(VERIFY_WRITE, loc, sizeof(*loc));
-                       if (err) return err;
-                       put_user(drive->bios_head, (byte *) &loc->heads);
-                       put_user(drive->bios_sect, (byte *) &loc->sectors);
-                       put_user(drive->bios_cyl, (unsigned short *) &loc->cylinders);
-                       put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect,
-                               (unsigned long *) &loc->start);
+                       if (put_user(drive->bios_head, (byte *) &loc->heads)) return -EFAULT;
+                       if (put_user(drive->bios_sect, (byte *) &loc->sectors)) return -EFAULT;
+                       if (put_user(drive->bios_cyl, (unsigned short *) &loc->cylinders)) return -EFAULT;
+                       if (put_user((unsigned)drive->part[MINOR(inode->i_rdev)&PARTN_MASK].start_sect,
+                               (unsigned long *) &loc->start)) return -EFAULT;
                        return 0;
                }
                case BLKFLSBUF:
@@ -2048,48 +1792,46 @@ static int ide_ioctl (struct inode *inode, struct file *file,
                        return 0;
 
                case BLKRAGET:
-                       return write_fs_long(arg, read_ahead[MAJOR(inode->i_rdev)]);
+                       return put_user(read_ahead[MAJOR(inode->i_rdev)], (long *) arg);
 
                case BLKGETSIZE:   /* Return device size */
-                       return write_fs_long(arg, drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects);
+                       return put_user(drive->part[MINOR(inode->i_rdev)&PARTN_MASK].nr_sects, (long *) arg);
+
                case BLKRRPART: /* Re-read partition tables */
                        if (!suser()) return -EACCES;
-                       return revalidate_disk(inode->i_rdev);
+                       return ide_revalidate_disk(inode->i_rdev);
 
                case HDIO_GET_KEEPSETTINGS:
-                       return write_fs_long(arg, drive->keep_settings);
+                       return put_user(drive->keep_settings, (long *) arg);
 
                case HDIO_GET_UNMASKINTR:
-                       return write_fs_long(arg, drive->unmask);
+                       return put_user(drive->unmask, (long *) arg);
 
                case HDIO_GET_DMA:
-                       return write_fs_long(arg, drive->using_dma);
+                       return put_user(drive->using_dma, (long *) arg);
 
                case HDIO_GET_32BIT:
-                       return write_fs_long(arg, drive->io_32bit);
+                       return put_user(drive->io_32bit, (long *) arg);
 
                case HDIO_GET_MULTCOUNT:
-                       return write_fs_long(arg, drive->mult_count);
+                       return put_user(drive->mult_count, (long *) arg);
 
                case HDIO_GET_IDENTITY:
-                       if (!arg || (MINOR(inode->i_rdev) & PARTN_MASK))
+                       if (MINOR(inode->i_rdev) & PARTN_MASK)
                                return -EINVAL;
                        if (drive->id == NULL)
                                return -ENOMSG;
-                       err = verify_area(VERIFY_WRITE, (char *)arg, sizeof(*drive->id));
-                       if (!err)
-                               copy_to_user((char *)arg, (char *)drive->id, sizeof(*drive->id));
-                       return err;
+                       if (copy_to_user((char *)arg, (char *)drive->id, sizeof(*drive->id)))
+                               return -EFAULT;
+                       return 0;
 
-                       case HDIO_GET_NOWERR:
-                       return write_fs_long(arg, drive->bad_wstat == BAD_R_STAT);
+               case HDIO_GET_NOWERR:
+                       return put_user(drive->bad_wstat == BAD_R_STAT, (long *) arg);
 
                case HDIO_SET_DMA:
                        if (!suser()) return -EACCES;
-#ifdef CONFIG_BLK_DEV_IDECD
-                       if (drive->media == ide_cdrom)
+                       if (drive->driver != NULL && !DRIVER(drive)->supports_dma)
                                return -EPERM;
-#endif /* CONFIG_BLK_DEV_IDECD */
                        if (!drive->id || !(drive->id->capability & 1) || !HWIF(drive)->dmaproc)
                                return -EPERM;
                case HDIO_SET_KEEPSETTINGS:
@@ -2166,28 +1908,23 @@ static int ide_ioctl (struct inode *inode, struct file *file,
                        byte args[4], *argbuf = args;
                        int argsize = 4;
                        if (!suser()) return -EACCES;
-                       if (NULL == (void *) arg) {
-                               err = ide_do_drive_cmd(drive, &rq, ide_wait);
-                       } else if (!(err = verify_area(VERIFY_READ,(void *)arg, 4))) {
-                               copy_from_user(args, (void *)arg, 4);
-                               if (args[3]) {
-                                       argsize = 4 + (SECTOR_WORDS * 4 * args[3]);
-                                       argbuf = kmalloc(argsize, GFP_KERNEL);
-                                       if (argbuf == NULL)
-                                               return -ENOMEM;
-                                       argbuf[0] = args[0];
-                                       argbuf[1] = args[1];
-                                       argbuf[2] = args[2];
-                                       argbuf[3] = args[3];
-                               }
-                               if (!(err = verify_area(VERIFY_WRITE,(void *)arg, argsize))) {
-                                       rq.buffer = argbuf;
-                                       err = ide_do_drive_cmd(drive, &rq, ide_wait);
-                                       copy_to_user((void *)arg, argbuf, argsize);
-                               }
-                               if (argsize > 4)
-                                       kfree(argbuf);
+                       if (NULL == (void *) arg)
+                               return ide_do_drive_cmd(drive, &rq, ide_wait);
+                       if (copy_from_user(args, (void *)arg, 4))
+                               return -EFAULT;
+                       if (args[3]) {
+                               argsize = 4 + (SECTOR_WORDS * 4 * args[3]);
+                               argbuf = kmalloc(argsize, GFP_KERNEL);
+                               if (argbuf == NULL)
+                                       return -ENOMEM;
+                               memcpy(argbuf, args, 4);
                        }
+                       rq.buffer = argbuf;
+                       err = ide_do_drive_cmd(drive, &rq, ide_wait);
+                       if (copy_to_user((void *)arg, argbuf, argsize))
+                               err = -EFAULT;
+                       if (argsize > 4)
+                               kfree(argbuf);
                        return err;
                }
                case HDIO_SET_PIO_MODE:
@@ -2208,17 +1945,22 @@ static int ide_ioctl (struct inode *inode, struct file *file,
                        (void) ide_do_drive_cmd (drive, &rq, ide_wait);
                        return 0;
 
+               case HDIO_SCAN_HWIF:
+               {
+                       int args[3];
+                       if (!suser()) return -EACCES;
+                       if (copy_from_user(args, (void *)arg, 3 * sizeof(int)))
+                               return -EFAULT;
+                       if (ide_register(args[0], args[1], args[2]) == -1)
+                               return -EIO;
+                       return 0;
+               }
+
                RO_IOCTLS(inode->i_rdev, arg);
 
                default:
-#ifdef CONFIG_BLK_DEV_IDECD
-                       if (drive->media == ide_cdrom)
-                               return ide_cdrom_ioctl(drive, inode, file, cmd, arg);
-#endif /* CONFIG_BLK_DEV_IDECD */
-#ifdef CONFIG_BLK_DEV_IDETAPE
-                       if (drive->media == ide_tape)
-                               return idetape_blkdev_ioctl(drive, inode, file, cmd, arg);
-#endif /* CONFIG_BLK_DEV_IDETAPE */
+                       if (drive->driver != NULL)
+                               return DRIVER(drive)->ioctl(drive, inode, file, cmd, arg);
                        return -EPERM;
        }
 }
@@ -2229,12 +1971,8 @@ static int ide_check_media_change (kdev_t i_rdev)
 
        if ((drive = get_info_ptr(i_rdev)) == NULL)
                return -ENODEV;
-#ifdef CONFIG_BLK_DEV_IDECD
-       if (drive->media == ide_cdrom)
-               return ide_cdrom_check_media_change (drive);
-#endif /* CONFIG_BLK_DEV_IDECD */
-       if (drive->removable) /* for disks */
-               return 1;       /* always assume it was changed */
+       if (drive->driver != NULL)
+               return DRIVER(drive)->media_change(drive);
        return 0;
 }
 
@@ -2265,532 +2003,44 @@ void ide_fixstring (byte *s, const int bytecount, const int byteswap)
                *p++ = '\0';
 }
 
-static inline void do_identify (ide_drive_t *drive, byte cmd)
+/*
+ * stridx() returns the offset of c within s,
+ * or -1 if c is '\0' or not found within s.
+ */
+static int stridx (const char *s, char c)
 {
-       int bswap;
-       struct hd_driveid *id;
-       unsigned long capacity, check;
-
-       id = drive->id = kmalloc (SECTOR_WORDS*4, GFP_KERNEL);
-       ide_input_data(drive, id, SECTOR_WORDS);/* read 512 bytes of id info */
-       sti();
-
-#if defined (CONFIG_SCSI_EATA_DMA) || defined (CONFIG_SCSI_EATA_PIO)
-       /*
-        * EATA SCSI controllers do a hardware ATA emulation:  
-        * Ignore them if there is a driver for them available.
-        */
-       if ((id->model[0] == 'P' && id->model[1] == 'M')
-        || (id->model[0] == 'S' && id->model[1] == 'K')) {
-               printk("%s: EATA SCSI HBA %.10s\n", drive->name, id->model);
-               drive->present = 0;
-               return;
-       }
-#endif
+       char *i = strchr(s, c);
+       return (i && c) ? i - s : -1;
+}
 
-       /*
-        *  WIN_IDENTIFY returns little-endian info,
-        *  WIN_PIDENTIFY *usually* returns little-endian info.
-        */
-       bswap = 1;
-       if (cmd == WIN_PIDENTIFY) {
-               if ((id->model[0] == 'N' && id->model[1] == 'E') /* NEC */
-                || (id->model[0] == 'F' && id->model[1] == 'X') /* Mitsumi */
-                || (id->model[0] == 'P' && id->model[1] == 'i'))/* Pioneer */
-                       bswap = 0;      /* Vertos drives may still be weird */
-       }
-       ide_fixstring (id->model,     sizeof(id->model),     bswap);
-       ide_fixstring (id->fw_rev,    sizeof(id->fw_rev),    bswap);
-       ide_fixstring (id->serial_no, sizeof(id->serial_no), bswap);
+/*
+ * match_parm() does parsing for ide_setup():
+ *
+ * 1. the first char of s must be '='.
+ * 2. if the remainder matches one of the supplied keywords,
+ *     the index (1 based) of the keyword is negated and returned.
+ * 3. if the remainder is a series of no more than max_vals numbers
+ *     separated by commas, the numbers are saved in vals[] and a
+ *     count of how many were saved is returned.  Base10 is assumed,
+ *     and base16 is allowed when prefixed with "0x".
+ * 4. otherwise, zero is returned.
+ */
+static int match_parm (char *s, const char *keywords[], int vals[], int max_vals)
+{
+       static const char *decimal = "0123456789";
+       static const char *hex = "0123456789abcdef";
+       int i, n;
 
-#ifdef CONFIG_BLK_DEV_IDEATAPI
-       /*
-        * Check for an ATAPI device
-        */
-       if (cmd == WIN_PIDENTIFY) {
-               byte type = (id->config >> 8) & 0x1f;
-               printk("%s: %s, ATAPI ", drive->name, id->model);
-#ifdef CONFIG_BLK_DEV_PROMISE
-               if (HWIF(drive)->is_promise2) {
-                       printk(" -- not supported on 2nd Promise port\n");
-                       drive->present = 0;
-                       return;
-               }
-#endif /* CONFIG_BLK_DEV_PROMISE */
-               switch (type) {
-                       case 0:         /* Early cdrom models used zero */
-                       case 5:
-#ifdef CONFIG_BLK_DEV_IDECD
-                               printk ("CDROM drive\n");
-                               drive->media = ide_cdrom;
-                               drive->present = 1;
-                               drive->removable = 1;
-                               return;
-#else
-                               printk ("CDROM ");
-                               break;
-#endif /* CONFIG_BLK_DEV_IDECD */
-                       case 1:
-#ifdef CONFIG_BLK_DEV_IDETAPE
-                               printk ("TAPE drive");
-                               if (idetape_identify_device (drive,id)) {
-                                       drive->media = ide_tape;
-                                       drive->present = 1;
-                                       drive->removable = 1;
-                                       if (drive->autotune != 2 && HWIF(drive)->dmaproc != NULL) {
-                                               if (!HWIF(drive)->dmaproc(ide_dma_check, drive))
-                                                       printk(", DMA");
-                                       }
-                                       printk("\n");
-                               }
-                               else {
-                                       drive->present = 0;
-                                       printk ("\nide-tape: the tape is not supported by this version of the driver\n");
-                               }
-                               return;
-#else
-                               printk ("TAPE ");
-                               break;
-#endif /* CONFIG_BLK_DEV_IDETAPE */
-                       default:
-                               drive->present = 0;
-                               printk("Type %d - Unknown device\n", type);
-                               return;
-               }
-               drive->present = 0;
-               printk("- not supported by this kernel\n");
-               return;
-       }
-#endif /* CONFIG_BLK_DEV_IDEATAPI */
-
-       /* check for removable disks (eg. SYQUEST), ignore 'WD' drives */
-       if (id->config & (1<<7)) {      /* removable disk ? */
-               if (id->model[0] != 'W' || id->model[1] != 'D')
-                       drive->removable = 1;
-       }
-
-       /* SunDisk drives: treat as non-removable, force one unit */
-       if (id->model[0] == 'S' && id->model[1] == 'u') {
-               drive->removable = 0;
-               if (drive->select.all & (1<<4)) {
-                   drive->present = 0;
-                   return;
-               }
-       }
-       
-       drive->media = ide_disk;
-       /* Extract geometry if we did not already have one for the drive */
-       if (!drive->present) {
-               drive->present = 1;
-               drive->cyl     = drive->bios_cyl  = id->cyls;
-               drive->head    = drive->bios_head = id->heads;
-               drive->sect    = drive->bios_sect = id->sectors;
-       }
-       /* Handle logical geometry translation by the drive */
-       if ((id->field_valid & 1) && id->cur_cyls && id->cur_heads
-        && (id->cur_heads <= 16) && id->cur_sectors)
-       {
-               /*
-                * Extract the physical drive geometry for our use.
-                * Note that we purposely do *not* update the bios info.
-                * This way, programs that use it (like fdisk) will
-                * still have the same logical view as the BIOS does,
-                * which keeps the partition table from being screwed.
-                *
-                * An exception to this is the cylinder count,
-                * which we reexamine later on to correct for 1024 limitations.
-                */
-               drive->cyl  = id->cur_cyls;
-               drive->head = id->cur_heads;
-               drive->sect = id->cur_sectors;
-
-               /* check for word-swapped "capacity" field in id information */
-               capacity = drive->cyl * drive->head * drive->sect;
-               check = (id->cur_capacity0 << 16) | id->cur_capacity1;
-               if (check == capacity) {        /* was it swapped? */
-                       /* yes, bring it into little-endian order: */
-                       id->cur_capacity0 = (capacity >>  0) & 0xffff;
-                       id->cur_capacity1 = (capacity >> 16) & 0xffff;
-               }
-       }
-       /* Use physical geometry if what we have still makes no sense */
-       if ((!drive->head || drive->head > 16) && id->heads && id->heads <= 16) {
-               drive->cyl  = id->cyls;
-               drive->head = id->heads;
-               drive->sect = id->sectors;
-       }
-       /* Correct the number of cyls if the bios value is too small */
-       if (drive->sect == drive->bios_sect && drive->head == drive->bios_head) {
-               if (drive->cyl > drive->bios_cyl)
-                       drive->bios_cyl = drive->cyl;
-       }
-
-       (void) current_capacity (drive); /* initialize LBA selection */
-
-       printk ("%s: %.40s, %ldMB w/%dkB Cache, %sCHS=%d/%d/%d",
-        drive->name, id->model, current_capacity(drive)/2048L, id->buf_size/2,
-        drive->select.b.lba ? "LBA, " : "",
-        drive->bios_cyl, drive->bios_head, drive->bios_sect);
-
-       drive->mult_count = 0;
-       if (id->max_multsect) {
-               drive->mult_req = INITIAL_MULT_COUNT;
-               if (drive->mult_req > id->max_multsect)
-                       drive->mult_req = id->max_multsect;
-               if (drive->mult_req || ((id->multsect_valid & 1) && id->multsect))
-                       drive->special.b.set_multmode = 1;
-       }
-       if (drive->autotune != 2 && HWIF(drive)->dmaproc != NULL) {
-               if (!(HWIF(drive)->dmaproc(ide_dma_check, drive)))
-                       printk(", DMA");
-       }
-       printk("\n");
-}
-
-/*
- * Delay for *at least* 50ms.  As we don't know how much time is left
- * until the next tick occurs, we wait an extra tick to be safe.
- * This is used only during the probing/polling for drives at boot time.
- */
-static void delay_50ms (void)
-{
-       unsigned long timer = jiffies + ((HZ + 19)/20) + 1;
-       while (timer > jiffies);
-}
-
-/*
- * try_to_identify() sends an ATA(PI) IDENTIFY request to a drive
- * and waits for a response.  It also monitors irqs while this is
- * happening, in hope of automatically determining which one is
- * being used by the interface.
- *
- * Returns:    0  device was identified
- *             1  device timed-out (no response to identify request)
- *             2  device aborted the command (refused to identify itself)
- */
-static int try_to_identify (ide_drive_t *drive, byte cmd)
-{
-       int hd_status, rc;
-       unsigned long timeout;
-       int irqs = 0;
-
-       if (!HWIF(drive)->irq) {                /* already got an IRQ? */
-               probe_irq_off(probe_irq_on());  /* clear dangling irqs */
-               irqs = probe_irq_on();          /* start monitoring irqs */
-               OUT_BYTE(drive->ctl,IDE_CONTROL_REG);   /* enable device irq */
-       }
-
-       delay_50ms();                           /* take a deep breath */
-       if ((IN_BYTE(IDE_ALTSTATUS_REG) ^ IN_BYTE(IDE_STATUS_REG)) & ~INDEX_STAT) {
-               printk("%s: probing with STATUS instead of ALTSTATUS\n", drive->name);
-               hd_status = IDE_STATUS_REG;     /* ancient Seagate drives */
-       } else
-               hd_status = IDE_ALTSTATUS_REG;  /* use non-intrusive polling */
-
-#if CONFIG_BLK_DEV_PROMISE
-       if (IS_PROMISE_DRIVE) {
-               if (promise_cmd(drive,PROMISE_IDENTIFY)) {
-                       if (irqs)
-                               (void) probe_irq_off(irqs);
-                       return 1;
-               }
-       } else
-#endif /* CONFIG_BLK_DEV_PROMISE */
-               OUT_BYTE(cmd,IDE_COMMAND_REG);          /* ask drive for ID */
-       timeout = ((cmd == WIN_IDENTIFY) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2;
-       timeout += jiffies;
-       do {
-               if (jiffies > timeout) {
-                       if (irqs)
-                               (void) probe_irq_off(irqs);
-                       return 1;       /* drive timed-out */
-               }
-               delay_50ms();           /* give drive a breather */
-       } while (IN_BYTE(hd_status) & BUSY_STAT);
-
-       delay_50ms();           /* wait for IRQ and DRQ_STAT */
-       if (OK_STAT(GET_STAT(),DRQ_STAT,BAD_R_STAT)) {
-               unsigned long flags;
-               save_flags(flags);
-               cli();                  /* some systems need this */
-               do_identify(drive, cmd); /* drive returned ID */
-               rc = 0;                 /* drive responded with ID */
-               (void) GET_STAT();      /* clear drive IRQ */
-               restore_flags(flags);
-       } else
-               rc = 2;                 /* drive refused ID */
-       if (!HWIF(drive)->irq) {
-               irqs = probe_irq_off(irqs);     /* get our irq number */
-               if (irqs > 0) {
-                       HWIF(drive)->irq = irqs; /* save it for later */
-                       irqs = probe_irq_on();
-                       OUT_BYTE(drive->ctl|2,IDE_CONTROL_REG); /* mask device irq */
-                       udelay(5);
-                       (void) probe_irq_off(irqs);
-                       (void) probe_irq_off(probe_irq_on()); /* clear self-inflicted irq */
-                       (void) GET_STAT();      /* clear drive IRQ */
-
-               } else {        /* Mmmm.. multiple IRQs.. don't know which was ours */
-                       printk("%s: IRQ probe failed (%d)\n", drive->name, irqs);
-#ifdef CONFIG_BLK_DEV_CMD640
-#ifdef CMD640_DUMP_REGS
-                       if (HWIF(drive)->chipset == ide_cmd640) {
-                               printk("%s: Hmmm.. probably a driver problem.\n", drive->name);
-                               CMD640_DUMP_REGS;
-                       }
-#endif /* CMD640_DUMP_REGS */
-#endif /* CONFIG_BLK_DEV_CMD640 */
-               }
-       }
-       return rc;
-}
-
-/*
- * do_probe() has the difficult job of finding a drive if it exists,
- * without getting hung up if it doesn't exist, without trampling on
- * ethernet cards, and without leaving any IRQs dangling to haunt us later.
- *
- * If a drive is "known" to exist (from CMOS or kernel parameters),
- * but does not respond right away, the probe will "hang in there"
- * for the maximum wait time (about 30 seconds), otherwise it will
- * exit much more quickly.
- *
- * Returns:    0  device was identified
- *             1  device timed-out (no response to identify request)
- *             2  device aborted the command (refused to identify itself)
- *             3  bad status from device (possible for ATAPI drives)
- *             4  probe was not attempted because failure was obvious
- */
-static int do_probe (ide_drive_t *drive, byte cmd)
-{
-       int rc;
-       ide_hwif_t *hwif = HWIF(drive);
-#ifdef CONFIG_BLK_DEV_IDEATAPI
-       if (drive->present) {   /* avoid waiting for inappropriate probes */
-               if ((drive->media != ide_disk) && (cmd == WIN_IDENTIFY))
-                       return 4;
-       }
-#endif /* CONFIG_BLK_DEV_IDEATAPI */
-#ifdef DEBUG
-       printk("probing for %s: present=%d, media=%d, probetype=%s\n",
-               drive->name, drive->present, drive->media,
-               (cmd == WIN_IDENTIFY) ? "ATA" : "ATAPI");
-#endif
-       SELECT_DRIVE(hwif,drive);
-       delay_50ms();
-       if (IN_BYTE(IDE_SELECT_REG) != drive->select.all && !drive->present) {
-               OUT_BYTE(0xa0,IDE_SELECT_REG);  /* exit with drive0 selected */
-               delay_50ms();           /* allow BUSY_STAT to assert & clear */
-               return 3;    /* no i/f present: avoid killing ethernet cards */
-       }
-
-       if (OK_STAT(GET_STAT(),READY_STAT,BUSY_STAT)
-        || drive->present || cmd == WIN_PIDENTIFY)
-       {
-               if ((rc = try_to_identify(drive,cmd)))   /* send cmd and wait */
-                       rc = try_to_identify(drive,cmd); /* failed: try again */
-               if (rc == 1)
-                       printk("%s: no response (status = 0x%02x)\n", drive->name, GET_STAT());
-               (void) GET_STAT();              /* ensure drive irq is clear */
-       } else {
-               rc = 3;                         /* not present or maybe ATAPI */
-       }
-       if (drive->select.b.unit != 0) {
-               OUT_BYTE(0xa0,IDE_SELECT_REG);  /* exit with drive0 selected */
-               delay_50ms();
-               (void) GET_STAT();              /* ensure drive irq is clear */
-       }
-       return rc;
-}
-
-/*
- * probe_for_drive() tests for existence of a given drive using do_probe().
- *
- * Returns:    0  no device was found
- *             1  device was found (note: drive->present might still be 0)
- */
-static inline byte probe_for_drive (ide_drive_t *drive)
-{
-       if (drive->noprobe)                     /* skip probing? */
-               return drive->present;
-       if (do_probe(drive, WIN_IDENTIFY) >= 2) { /* if !(success||timed-out) */
-#ifdef CONFIG_BLK_DEV_IDEATAPI
-               (void) do_probe(drive, WIN_PIDENTIFY); /* look for ATAPI device */
-#endif /* CONFIG_BLK_DEV_IDEATAPI */
-       }
-       if (!drive->present)
-               return 0;                       /* drive not found */
-       if (drive->id == NULL) {                /* identification failed? */
-               if (drive->media == ide_disk) {
-                       printk ("%s: non-IDE drive, CHS=%d/%d/%d\n",
-                        drive->name, drive->cyl, drive->head, drive->sect);
-               }
-#ifdef CONFIG_BLK_DEV_IDECD
-               else if (drive->media == ide_cdrom) {
-                       printk("%s: ATAPI cdrom (?)\n", drive->name);
-               }
-#endif /* CONFIG_BLK_DEV_IDECD */
-               else {
-                       drive->present = 0;     /* nuke it */
-               }
-       }
-       return 1;       /* drive was found */
-}
-
-/*
- * We query CMOS about hard disks : it could be that we have a SCSI/ESDI/etc
- * controller that is BIOS compatible with ST-506, and thus showing up in our
- * BIOS table, but not register compatible, and therefore not present in CMOS.
- *
- * Furthermore, we will assume that our ST-506 drives <if any> are the primary
- * drives in the system -- the ones reflected as drive 1 or 2.  The first
- * drive is stored in the high nibble of CMOS byte 0x12, the second in the low
- * nibble.  This will be either a 4 bit drive type or 0xf indicating use byte
- * 0x19 for an 8 bit type, drive 1, 0x1a for drive 2 in CMOS.  A non-zero value
- * means we have an AT controller hard disk for that drive.
- *
- * Of course, there is no guarantee that either drive is actually on the
- * "primary" IDE interface, but we don't bother trying to sort that out here.
- * If a drive is not actually on the primary interface, then these parameters
- * will be ignored.  This results in the user having to supply the logical
- * drive geometry as a boot parameter for each drive not on the primary i/f.
- *
- * The only "perfect" way to handle this would be to modify the setup.[cS] code
- * to do BIOS calls Int13h/Fn08h and Int13h/Fn48h to get all of the drive info
- * for us during initialization.  I have the necessary docs -- any takers?  -ml
- */
-static void probe_cmos_for_drives (ide_hwif_t *hwif)
-{
-#ifdef __i386__
-       extern struct drive_info_struct drive_info;
-       byte cmos_disks, *BIOS = (byte *) &drive_info;
-       int unit;
-
-#ifdef CONFIG_BLK_DEV_PROMISE
-       if (hwif->is_promise2)
-               return;
-#endif /* CONFIG_BLK_DEV_PROMISE */
-       outb_p(0x12,0x70);              /* specify CMOS address 0x12 */
-       cmos_disks = inb_p(0x71);       /* read the data from 0x12 */
-       /* Extract drive geometry from CMOS+BIOS if not already setup */
-       for (unit = 0; unit < MAX_DRIVES; ++unit) {
-               ide_drive_t *drive = &hwif->drives[unit];
-               if ((cmos_disks & (0xf0 >> (unit*4))) && !drive->present && !drive->nobios) {
-                       drive->cyl   = drive->bios_cyl  = *(unsigned short *)BIOS;
-                       drive->head  = drive->bios_head = *(BIOS+2);
-                       drive->sect  = drive->bios_sect = *(BIOS+14);
-                       drive->ctl   = *(BIOS+8);
-                       drive->present = 1;
-               }
-               BIOS += 16;
-       }
-#endif
-}
-
-/*
- * This routine only knows how to look for drive units 0 and 1
- * on an interface, so any setting of MAX_DRIVES > 2 won't work here.
- */
-static void probe_hwif (ide_hwif_t *hwif)
-{
-       unsigned int unit;
-
-       if (hwif->noprobe)
-               return;
-       if (hwif->io_base == HD_DATA)
-               probe_cmos_for_drives (hwif);
-#if CONFIG_BLK_DEV_PROMISE
-       if (!hwif->is_promise2 &&
-          (check_region(hwif->io_base,8) || check_region(hwif->ctl_port,1))) {
-#else
-       if (check_region(hwif->io_base,8) || check_region(hwif->ctl_port,1)) {
-#endif /* CONFIG_BLK_DEV_PROMISE */
-               int msgout = 0;
-               for (unit = 0; unit < MAX_DRIVES; ++unit) {
-                       ide_drive_t *drive = &hwif->drives[unit];
-                       if (drive->present) {
-                               drive->present = 0;
-                               printk("%s: ERROR, PORTS ALREADY IN USE\n", drive->name);
-                               msgout = 1;
-                       }
-               }
-               if (!msgout)
-                       printk("%s: ports already in use, skipping probe\n", hwif->name);
-       } else {
-               unsigned long flags;
-               save_flags(flags);
-
-               sti();  /* needed for jiffies and irq probing */
-               /*
-                * Second drive should only exist if first drive was found,
-                * but a lot of cdrom drives are configured as single slaves.
-                */
-               for (unit = 0; unit < MAX_DRIVES; ++unit) {
-                       ide_drive_t *drive = &hwif->drives[unit];
-                       (void) probe_for_drive (drive);
-                       if (drive->present && drive->media == ide_disk) {
-                               if ((!drive->head || drive->head > 16) && !drive->select.b.lba) {
-                                       printk("%s: INVALID GEOMETRY: %d PHYSICAL HEADS?\n",
-                                        drive->name, drive->head);
-                                       drive->present = 0;
-                               }
-                       }
-                       if (drive->present && !hwif->present) {
-                               hwif->present = 1;
-                               request_region(hwif->io_base,  8, hwif->name);
-                               request_region(hwif->ctl_port, 1, hwif->name);
-                       }
-               }
-               restore_flags(flags);
-               for (unit = 0; unit < MAX_DRIVES; ++unit) {
-                       ide_drive_t *drive = &hwif->drives[unit];
-                       if (drive->present && drive->media != ide_tape) {
-                               ide_tuneproc_t *tuneproc = HWIF(drive)->tuneproc;
-                               if (tuneproc != NULL && drive->autotune == 1)
-                                       tuneproc(drive, 255);   /* auto-tune PIO mode */
-                       }
-               }
-       }
-}
-
-/*
- * stridx() returns the offset of c within s,
- * or -1 if c is '\0' or not found within s.
- */
-static int stridx (const char *s, char c)
-{
-       char *i = strchr(s, c);
-       return (i && c) ? i - s : -1;
-}
-
-/*
- * match_parm() does parsing for ide_setup():
- *
- * 1. the first char of s must be '='.
- * 2. if the remainder matches one of the supplied keywords,
- *     the index (1 based) of the keyword is negated and returned.
- * 3. if the remainder is a series of no more than max_vals numbers
- *     separated by commas, the numbers are saved in vals[] and a
- *     count of how many were saved is returned.  Base10 is assumed,
- *     and base16 is allowed when prefixed with "0x".
- * 4. otherwise, zero is returned.
- */
-static int match_parm (char *s, const char *keywords[], int vals[], int max_vals)
-{
-       static const char *decimal = "0123456789";
-       static const char *hex = "0123456789abcdef";
-       int i, n;
-
-       if (*s++ == '=') {
-               /*
-                * Try matching against the supplied keywords,
-                * and return -(index+1) if we match one
-                */
-               if (keywords != NULL) {
-                       for (i = 0; *keywords != NULL; ++i) {
-                               if (!strcmp(s, *keywords++))
-                                       return -(i+1);
-                       }
+       if (*s++ == '=') {
+               /*
+                * Try matching against the supplied keywords,
+                * and return -(index+1) if we match one
+                */
+               if (keywords != NULL) {
+                       for (i = 0; *keywords != NULL; ++i) {
+                               if (!strcmp(s, *keywords++))
+                                       return -(i+1);
+                       }
                }
                /*
                 * Look for a series of no more than "max_vals"
@@ -2837,7 +2087,7 @@ static int match_parm (char *s, const char *keywords[], int vals[], int max_vals
  *                             and quite likely to cause trouble with
  *                             older/odd IDE drives.
  *
- * "idebus=xx"         : inform IDE driver of VESA/PCI bus speed in Mhz,
+ * "idebus=xx"         : inform IDE driver of VESA/PCI bus speed in MHz,
  *                             where "xx" is between 20 and 66 inclusive,
  *                             used when tuning chipset PIO modes.
  *                             For PCI bus, 25 is correct for a P75 system,
@@ -2892,7 +2142,7 @@ void ide_setup (char *s)
        if (s[0] == 'h' && s[1] == 'd' && s[2] >= 'a' && s[2] <= max_drive) {
                const char *hd_words[] = {"none", "noprobe", "nowerr", "cdrom",
                                "serialize", "autotune", "noautotune",
-                               "slow", NULL};
+                               "slow", "swapdata", NULL};
                unit = s[2] - 'a';
                hw   = unit / MAX_DRIVES;
                unit = unit % MAX_DRIVES;
@@ -2925,6 +2175,9 @@ void ide_setup (char *s)
                        case -8: /* "slow" */
                                drive->slow = 1;
                                goto done;
+                       case -9: /* swapdata */
+                               drive->bswap = 1;
+                               goto done;
                        case 3: /* cyl,head,sect */
                                drive->media    = ide_disk;
                                drive->cyl      = drive->bios_cyl  = vals[0];
@@ -2961,7 +2214,7 @@ void ide_setup (char *s)
                 * Be VERY CAREFUL changing this: note hardcoded indexes below
                 */
                const char *ide_words[] = {"noprobe", "serialize", "autotune", "noautotune",
-                       "qd6580", "ht6560b", "cmd640_vlb", "dtc2278", "umc8672", "ali14xx", "dc4030", NULL};
+                       "qd6580", "ht6560b", "cmd640_vlb", "dtc2278", "umc8672", "ali14xx", "dc4030", "reset", NULL};
                hw = s[3] - '0';
                hwif = &ide_hwifs[hw];
                i = match_parm(&s[4], ide_words, vals, 3);
@@ -2969,7 +2222,7 @@ void ide_setup (char *s)
                /*
                 * Cryptic check to ensure chipset not already set for hwif:
                 */
-               if (i > 0 || i <= -5) {
+               if (i > 0 || (i <= -5 && i != -12)) {
                        if (hwif->chipset != ide_unknown)
                                goto bad_option;
                        if (i <= -5) {
@@ -2980,10 +2233,14 @@ void ide_setup (char *s)
                                 */
                                if (hw != 0)
                                        goto bad_hwif;
+                               printk("\n");
                        }
                }
 
                switch (i) {
+                       case -12: /* "reset" */
+                               hwif->reset = 1;
+                               goto done;
 #ifdef CONFIG_BLK_DEV_PROMISE
                        case -11: /* "dc4030" */
                        {
@@ -3062,8 +2319,8 @@ void ide_setup (char *s)
                        case 2: /* base,ctl */
                                vals[2] = 0;    /* default irq = probe for it */
                        case 3: /* base,ctl,irq */
-                               hwif->io_base  = vals[0];
-                               hwif->ctl_port = vals[1];
+                               ide_init_hwif_ports(hwif->io_ports, (ide_ioreg_t) vals[0], &hwif->irq);
+                               hwif->io_ports[IDE_CONTROL_OFFSET] = (ide_ioreg_t) vals[1];
                                hwif->irq      = vals[2];
                                hwif->noprobe  = 0;
                                hwif->chipset  = ide_generic;
@@ -3155,149 +2412,8 @@ int ide_xlate_1024 (kdev_t i_rdev, int xparm, const char *msg)
        return 1;
 }
 
-#if MAX_HWIFS > 1
-/*
- * save_match() is used to simplify logic in init_irq() below.
- *
- * A loophole here is that we may not know about a particular
- * hwif's irq until after that hwif is actually probed/initialized..
- * This could be a problem for the case where an hwif is on a
- * dual interface that requires serialization (eg. cmd640) and another
- * hwif using one of the same irqs is initialized beforehand.
- *
- * This routine detects and reports such situations, but does not fix them.
- */
-static void save_match (ide_hwif_t *hwif, ide_hwif_t *new, ide_hwif_t **match)
-{
-       ide_hwif_t *m = *match;
-
-       if (m && m->hwgroup && m->hwgroup != new->hwgroup) {
-               if (!new->hwgroup)
-                       return;
-               printk("%s: potential irq problem with %s and %s\n", hwif->name, new->name, m->name);
-       }
-       if (!m || m->irq != hwif->irq) /* don't undo a prior perfect match */
-               *match = new;
-}
-#endif /* MAX_HWIFS > 1 */
-
-/*
- * This routine sets up the irq for an ide interface, and creates a new
- * hwgroup for the irq/hwif if none was previously assigned.
- *
- * Much of the code is for correctly detecting/handling irq sharing
- * and irq serialization situations.  This is somewhat complex because
- * it handles static as well as dynamic (PCMCIA) IDE interfaces.
- *
- * The SA_INTERRUPT in sa_flags means ide_intr() is always entered with
- * interrupts completely disabled.  This can be bad for interrupt latency,
- * but anything else has led to problems on some machines.  We re-enable
- * interrupts as much as we can safely do in most places.
- */
-static int init_irq (ide_hwif_t *hwif)
-{
-       unsigned long flags;
-#if MAX_HWIFS > 1
-       unsigned int index;
-#endif /* MAX_HWIFS > 1 */
-       ide_hwgroup_t *hwgroup;
-       ide_hwif_t *match = NULL;
-
-       save_flags(flags);
-       cli();
-
-       hwif->hwgroup = NULL;
-#if MAX_HWIFS > 1
-       /*
-        * Group up with any other hwifs that share our irq(s).
-        */
-       for (index = 0; index < MAX_HWIFS; index++) {
-               ide_hwif_t *h = &ide_hwifs[index];
-               if (h->hwgroup) {  /* scan only initialized hwif's */
-                       if (hwif->irq == h->irq) {
-                               hwif->sharing_irq = h->sharing_irq = 1;
-                               save_match(hwif, h, &match);
-                       }
-                       if (hwif->serialized) {
-                               ide_hwif_t *mate = &ide_hwifs[hwif->index^1];
-                               if (index == mate->index || h->irq == mate->irq)
-                                       save_match(hwif, h, &match);
-                       }
-                       if (h->serialized) {
-                               ide_hwif_t *mate = &ide_hwifs[h->index^1];
-                               if (hwif->irq == mate->irq)
-                                       save_match(hwif, h, &match);
-                       }
-               }
-       }
-#endif /* MAX_HWIFS > 1 */
-       /*
-        * If we are still without a hwgroup, then form a new one
-        */
-       if (match) {
-               hwgroup = match->hwgroup;
-       } else {
-               hwgroup = kmalloc(sizeof(ide_hwgroup_t), GFP_KERNEL);
-               hwgroup->hwif    = hwgroup->next_hwif = hwif->next = hwif;
-               hwgroup->rq      = NULL;
-               hwgroup->handler = NULL;
-               if (hwif->drives[0].present)
-                       hwgroup->drive = &hwif->drives[0];
-               else
-                       hwgroup->drive = &hwif->drives[1];
-               hwgroup->poll_timeout = 0;
-               init_timer(&hwgroup->timer);
-               hwgroup->timer.function = &timer_expiry;
-               hwgroup->timer.data = (unsigned long) hwgroup;
-       }
-
-       /*
-        * Allocate the irq, if not already obtained for another hwif
-        */
-       if (!match || match->irq != hwif->irq) {
-               if (request_irq(hwif->irq, ide_intr, SA_INTERRUPT, hwif->name, hwgroup)) {
-                       if (!match)
-                               kfree(hwgroup);
-                       restore_flags(flags);
-                       return 1;
-               }
-       }
-
-       /*
-        * Everything is okay, so link us into the hwgroup
-        */
-       hwif->hwgroup = hwgroup;
-       hwif->next = hwgroup->hwif->next;
-       hwgroup->hwif->next = hwif;
-
-       restore_flags(flags);   /* safe now that hwif->hwgroup is set up */
-
-       printk("%s at 0x%03x-0x%03x,0x%03x on irq %d", hwif->name,
-               hwif->io_base, hwif->io_base+7, hwif->ctl_port, hwif->irq);
-       if (match)
-               printk(" (%sed with %s)", hwif->sharing_irq ? "shar" : "serializ", match->name);
-       printk("\n");
-       return 0;
-}
-
-static struct file_operations ide_fops = {
-       NULL,                   /* lseek - default */
-       block_read,             /* read - general block-dev read */
-       block_write,            /* write - general block-dev write */
-       NULL,                   /* readdir - bad */
-       NULL,                   /* select */
-       ide_ioctl,              /* ioctl */
-       NULL,                   /* mmap */
-       ide_open,               /* open */
-       ide_release,            /* release */
-       block_fsync             /* fsync */
-       ,NULL,                  /* fasync */
-       ide_check_media_change, /* check_media_change */
-       revalidate_disk         /* revalidate */
-};
-
 #ifdef CONFIG_PCI
-#if defined(CONFIG_BLK_DEV_RZ1000) || defined(CONFIG_BLK_DEV_TRITON)
+#if defined(CONFIG_BLK_DEV_RZ1000) || defined(CONFIG_BLK_DEV_TRITON) || defined(CONFIG_BLK_DEV_OPTI621)
 
 typedef void (ide_pci_init_proc_t)(byte, byte);
 
@@ -3319,7 +2435,7 @@ static void ide_probe_pci (unsigned short vendor, unsigned short device, ide_pci
        restore_flags(flags);
 }
 
-#endif /* defined(CONFIG_BLK_DEV_RZ1000) || defined(CONFIG_BLK_DEV_TRITON) */
+#endif /* defined(CONFIG_BLK_DEV_RZ1000) || defined(CONFIG_BLK_DEV_TRITON) || defined(CONFIG_BLK_DEV_OPTI621) */
 #endif /* CONFIG_PCI */
 
 /*
@@ -3350,6 +2466,9 @@ static void probe_for_hwifs (void)
                ide_probe_pci (PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371_0, &ide_init_triton, 1);
                ide_probe_pci (PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371SB_1, &ide_init_triton, 0);
 #endif /* CONFIG_BLK_DEV_TRITON */
+#ifdef CONFIG_BLK_DEV_OPTI621
+               ide_probe_pci (PCI_VENDOR_ID_OPTI, PCI_DEVICE_ID_OPTI_82C621, &ide_init_opti621, 0);
+#endif /* CONFIG_BLK_DEV_OPTI621 */
        }
 #endif /* CONFIG_PCI */
 #ifdef CONFIG_BLK_DEV_CMD640
@@ -3363,196 +2482,287 @@ static void probe_for_hwifs (void)
 #endif
 }
 
-static int hwif_init (int h)
+void ide_init_builtin_drivers (void)
 {
-       ide_hwif_t *hwif = &ide_hwifs[h];
-       void (*rfn)(void);
-       
-       if (!hwif->present)
-               return 0;
-       if (!hwif->irq) {
-               if (!(hwif->irq = default_irqs[h])) {
-                       printk("%s: DISABLED, NO IRQ\n", hwif->name);
-                       return (hwif->present = 0);
-               }
-       }
-#ifdef CONFIG_BLK_DEV_HD
-       if (hwif->irq == HD_IRQ && hwif->io_base != HD_DATA) {
-               printk("%s: CANNOT SHARE IRQ WITH OLD HARDDISK DRIVER (hd.c)\n", hwif->name);
-               return (hwif->present = 0);
-       }
-#endif /* CONFIG_BLK_DEV_HD */
-       
-       hwif->present = 0; /* we set it back to 1 if all is ok below */
-       switch (hwif->major) {
-       case IDE0_MAJOR: rfn = &do_ide0_request; break;
-#if MAX_HWIFS > 1
-       case IDE1_MAJOR: rfn = &do_ide1_request; break;
-#endif
-#if MAX_HWIFS > 2
-       case IDE2_MAJOR: rfn = &do_ide2_request; break;
-#endif
-#if MAX_HWIFS > 3
-       case IDE3_MAJOR: rfn = &do_ide3_request; break;
-#endif
-       default:
-               printk("%s: request_fn NOT DEFINED\n", hwif->name);
-               return (hwif->present = 0);
-       }
-       if (register_blkdev (hwif->major, hwif->name, &ide_fops)) {
-               printk("%s: UNABLE TO GET MAJOR NUMBER %d\n", hwif->name, hwif->major);
-       } else if (init_irq (hwif)) {
-               printk("%s: UNABLE TO GET IRQ %d\n", hwif->name, hwif->irq);
-               (void) unregister_blkdev (hwif->major, hwif->name);
-       } else {
-               init_gendisk(hwif);
-               blk_dev[hwif->major].request_fn = rfn;
-               read_ahead[hwif->major] = 8;    /* (4kB) */
-               hwif->present = 1;      /* success */
-       }
-       return hwif->present;
-}
-
-/*
- * This is gets invoked once during initialization, to set *everything* up
- */
-int ide_init (void)
-{
-       int index;
-
-       init_ide_data ();
        /*
         * Probe for special "known" interface chipsets
         */
        probe_for_hwifs ();
 
-       /*
-        * Probe for drives in the usual way.. CMOS/BIOS, then poke at ports
-        */
-       for (index = 0; index < MAX_HWIFS; ++index)
-               probe_hwif (&ide_hwifs[index]);
-       for (index = 0; index < MAX_HWIFS; ++index)
-               hwif_init (index);
+#ifdef CONFIG_BLK_DEV_IDE
+#ifdef __mc68000__
+       if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) {
+               ide_get_lock(&ide_lock, ide_intr, NULL);
+               disable_irq(ide_hwifs[0].irq);
+       }
+#endif /* __mc68000__ */
+
+       (void) ideprobe_init();
+
+#ifdef __mc68000__
+       if (ide_hwifs[0].io_ports[IDE_DATA_OFFSET]) {
+               enable_irq(ide_hwifs[0].irq);
+               ide_release_lock(&ide_lock);
+       }
+#endif /* __mc68000__ */
+#endif /* CONFIG_BLK_DEV_IDE */
 
+#ifdef CONFIG_BLK_DEV_IDEDISK
+       (void) idedisk_init();
+#endif /* CONFIG_BLK_DEV_IDEDISK */
+#ifdef CONFIG_BLK_DEV_IDECD
+       (void) ide_cdrom_init();
+#endif /* CONFIG_BLK_DEV_IDECD */
 #ifdef CONFIG_BLK_DEV_IDETAPE
-       idetape_register_chrdev();      /* Register character device interface to the ide tape */
+       (void) idetape_init();
 #endif /* CONFIG_BLK_DEV_IDETAPE */
-       
-       return 0;
+#ifdef CONFIG_BLK_DEV_IDEFLOPPY
+       (void) idefloppy_init();
+#endif /* CONFIG_BLK_DEV_IDEFLOPPY */
 }
 
-#ifdef CONFIG_BLK_DEV_IDE_PCMCIA
-int ide_register(int io_base, int ctl_port, int irq)
+static int default_cleanup (ide_drive_t *drive)
 {
-       int index, i, rc = -1;
-       ide_hwif_t *hwif;
+       return ide_unregister_subdriver(drive);
+}
+
+static void default_do_request(ide_drive_t *drive, struct request *rq, unsigned long block)
+{
+       ide_end_request(0, HWGROUP(drive));
+}
+static void default_end_request (byte uptodate, ide_hwgroup_t *hwgroup)
+{
+       ide_end_request(uptodate, hwgroup);
+}
+  
+static int default_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file,
+                         unsigned int cmd, unsigned long arg)
+{
+       return -EIO;
+}
+
+static int default_open (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+       drive->usage--;
+       return -EIO;
+}
+
+static void default_release (struct inode *inode, struct file *filp, ide_drive_t *drive)
+{
+}
+
+static int default_check_media_change (ide_drive_t *drive)
+{
+       return 1;
+}
+
+static void default_pre_reset (ide_drive_t *drive)
+{
+}
+
+static unsigned long default_capacity (ide_drive_t *drive)
+{
+       return 0x7fffffff;      /* cdrom or tape */
+}
+
+static void default_special (ide_drive_t *drive)
+{
+       special_t *s = &drive->special;
+
+       s->all = 0;
+       drive->mult_req = 0;
+}
+
+static void setup_driver_defaults (ide_drive_t *drive)
+{
+       ide_driver_t *d = drive->driver;
+
+       if (d->cleanup == NULL)         d->cleanup = default_cleanup;
+       if (d->do_request == NULL)      d->do_request = default_do_request;
+       if (d->end_request == NULL)     d->end_request = default_end_request;
+       if (d->ioctl == NULL)           d->ioctl = default_ioctl;
+       if (d->open == NULL)            d->open = default_open;
+       if (d->release == NULL)         d->release = default_release;
+       if (d->media_change == NULL)    d->media_change = default_check_media_change;
+       if (d->pre_reset == NULL)       d->pre_reset = default_pre_reset;
+       if (d->capacity == NULL)        d->capacity = default_capacity;
+       if (d->special == NULL)         d->special = default_special;
+}
+
+ide_drive_t *ide_scan_devices (byte media, ide_driver_t *driver, int n)
+{
+       unsigned int unit, index, i;
        ide_drive_t *drive;
-       unsigned long flags;
 
+       for (index = 0, i = 0; index < MAX_HWIFS; ++index) {
+               for (unit = 0; unit < MAX_DRIVES; ++unit) {
+                       drive = &ide_hwifs[index].drives[unit];
+                       if (drive->present && drive->media == media &&
+                           drive->driver == driver && ++i > n)
+                               return drive;
+               }
+       }
+       return NULL;
+}
+
+int ide_register_subdriver (ide_drive_t *drive, ide_driver_t *driver, int version)
+{
+       unsigned long flags;
+       
        save_flags(flags);
        cli();
-       for (index = 0; index < MAX_HWIFS; ++index) {
-               hwif = &ide_hwifs[index];
-               if (hwif->present) {
-                       if (hwif->io_base == io_base || hwif->ctl_port == ctl_port)
-                               break; /* this ide port already exists */
-               } else {
-                       hwif->io_base = io_base;
-                       hwif->ctl_port = ctl_port;
-                       hwif->irq = irq;
-                       hwif->noprobe = 0;
-                       probe_hwif(hwif);
-                       if (!hwif_init(index))
-                               break;
-                       for (i = 0; i < hwif->gd->nr_real; i++) {
-                               drive = &hwif->drives[i];
-                               revalidate_disk(MKDEV(hwif->major, i<<PARTN_BITS));
-#ifdef CONFIG_BLK_DEV_IDECD
-                               if (drive->present && drive->media == ide_cdrom)
-                                       ide_cdrom_setup(drive);
-#endif /* CONFIG_BLK_DEV_IDECD */
-                       }
-                       rc = index;
-                       break;
-               }
+       if (version != IDE_SUBDRIVER_VERSION || !drive->present || drive->driver != NULL ||
+           drive->busy || drive->usage || drive->media != driver->media) {
+               restore_flags(flags);
+               return 1;
        }
+       drive->driver = driver;
+       setup_driver_defaults(drive);
        restore_flags(flags);
-       return rc;
+       if (driver->supports_dma && !drive->using_dma && drive->autotune != 2 && HWIF(drive)->dmaproc != NULL)
+               (void) (HWIF(drive)->dmaproc(ide_dma_check, drive));
+       drive->revalidate = 1;
+       return 0;
 }
 
-void ide_unregister (unsigned int index)
+int ide_unregister_subdriver (ide_drive_t *drive)
 {
-       struct gendisk *gd, **gdp;
-       ide_hwif_t *hwif, *g;
-       ide_hwgroup_t *hwgroup;
-       int irq_count = 0;
        unsigned long flags;
-
-       if (index >= MAX_HWIFS)
-               return;
+       
        save_flags(flags);
        cli();
-       hwif = &ide_hwifs[index];
-       if (!hwif->present || hwif->drives[0].busy || hwif->drives[1].busy) {
+       if (drive->usage || drive->busy || drive->driver == NULL || DRIVER(drive)->busy) {
                restore_flags(flags);
-               return;
+               return 1;
        }
-       hwif->present = 0;
-       hwgroup = hwif->hwgroup;
+       drive->driver = NULL;
+       restore_flags(flags);
+       return 0;
+}
 
-       /*
-        * free the irq if we were the only hwif using it
-        */
-       g = hwgroup->hwif;
-       do {
-               if (g->irq == hwif->irq)
-                       ++irq_count;
-               g = g->next;
-       } while (g != hwgroup->hwif);
-       if (irq_count == 1)
-               free_irq(hwif->irq, hwgroup);
+int ide_register_module (ide_module_t *module)
+{
+       ide_module_t *p = ide_modules;
 
-       /*
-        * Note that we only release the standard ports,
-        * and do not even try to handle any extra ports
-        * allocated for weird IDE interface chipsets.
-        */
-       release_region(hwif->io_base, 8);
-       release_region(hwif->ctl_port, 1);
+       while (p) {
+               if (p == module)
+                       return 1;
+               p = p->next;
+       }
+       module->next = ide_modules;
+       ide_modules = module;
+       revalidate_drives();
+       return 0;
+}
+
+void ide_unregister_module (ide_module_t *module)
+{
+       ide_module_t **p;
+
+       for (p = &ide_modules; (*p) && (*p) != module; p = &((*p)->next));
+       if (*p)
+               *p = (*p)->next;
+}
+
+struct file_operations ide_fops[] = {{
+       NULL,                   /* lseek - default */
+       block_read,             /* read - general block-dev read */
+       block_write,            /* write - general block-dev write */
+       NULL,                   /* readdir - bad */
+       NULL,                   /* select */
+       ide_ioctl,              /* ioctl */
+       NULL,                   /* mmap */
+       ide_open,               /* open */
+       ide_release,            /* release */
+       block_fsync,            /* fsync */
+       NULL,                   /* fasync */
+       ide_check_media_change, /* check_media_change */
+       ide_revalidate_disk     /* revalidate */
+}};
+
+static struct symbol_table ide_syms = {
+#include <linux/symtab_begin.h>
+       X(ide_hwifs),
+       X(ide_register_module),         X(ide_unregister_module),
 
        /*
-        * Remove us from the hwgroup, and free
-        * the hwgroup if we were the only member
+        * Probe module
         */
-       while (hwgroup->hwif->next != hwif)
-               hwgroup->hwif = hwgroup->hwif->next;
-       hwgroup->hwif->next = hwif->next;
-       if (hwgroup->hwif == hwif)
-               hwgroup->hwif = hwif->next;
-       if (hwgroup->next_hwif == hwif)
-               hwgroup->next_hwif = hwif->next;
-       if (hwgroup->hwif == hwif)
-               kfree(hwgroup);
+       X(ide_timer_expiry),            X(ide_intr),
+       X(ide_geninit),                 X(ide_fops),
+       X(do_ide0_request),
+#if MAX_HWIFS > 1
+       X(do_ide1_request),
+#endif /* MAX_HWIFS > 1 */
+#if MAX_HWIFS > 2
+       X(do_ide2_request),
+#endif /* MAX_HWIFS > 2 */
+#if MAX_HWIFS > 3
+       X(do_ide3_request),
+#endif /* MAX_HWIFS > 3 */
 
        /*
-        * Remove us from the kernel's knowledge
+        * Driver module
         */
-       unregister_blkdev(hwif->major, hwif->name);
-       kfree(blksize_size[hwif->major]);
-       blk_dev[hwif->major].request_fn = NULL;
-       blksize_size[hwif->major] = NULL;
-       for (gdp = &gendisk_head; *gdp; gdp = &((*gdp)->next))
-               if (*gdp == hwif->gd)
-                       break;
-       if (*gdp == NULL)
-               printk("gd not in disk chain!\n");
-       else {
-               gd = *gdp; *gdp = gd->next;
-               kfree(gd->sizes);
-               kfree(gd->part);
-               kfree(gd);
+       X(ide_scan_devices),            X(ide_register_subdriver),
+       X(ide_unregister_subdriver),    X(ide_input_data),
+       X(ide_output_data),             X(atapi_input_bytes),
+       X(atapi_output_bytes),          X(ide_set_handler),
+       X(ide_dump_status),             X(ide_error),
+       X(ide_fixstring),               X(ide_wait_stat),
+       X(ide_do_reset),                X(ide_init_drive_cmd),
+       X(ide_do_drive_cmd),            X(ide_end_drive_cmd),
+       X(ide_end_request),             X(ide_revalidate_disk),
+       X(ide_cmd),
+
+       X(ide_register),                X(ide_unregister),
+#include <linux/symtab_end.h>
+};
+
+/*
+ * This is gets invoked once during initialization, to set *everything* up
+ */
+int ide_init (void)
+{
+       init_ide_data ();
+
+       initializing = 1;
+       ide_init_builtin_drivers();
+       initializing = 0;
+
+       (void) register_symtab(&ide_syms);
+       return 0;
+}
+
+#ifdef MODULE
+char *options = NULL;
+
+static void parse_options (char *line)
+{
+       char *next = line;
+
+       if (line == NULL || !*line)
+               return;
+       while ((line = next) != NULL) {
+               if ((next = strchr(line,' ')) != NULL)
+                       *next++ = 0;
+               if (!strncmp(line,"ide",3) || (!strncmp(line,"hd",2) && line[2] != '='))
+                       ide_setup(line);
        }
-       init_hwif_data (index); /* restore hwif data to pristine status */
-       restore_flags(flags);
 }
-#endif /* CONFIG_BLK_DEV_IDE_PCMCIA */
+
+int init_module (void)
+{
+       parse_options(options);
+       return ide_init();
+}
+
+void cleanup_module (void)
+{
+       int index;
+
+       for (index = 0; index < MAX_HWIFS; ++index)
+               ide_unregister(index);
+}
+#endif /* MODULE */
index 98ec3d939870d5ccff4da9394b1d8c905e010db6..79b5bb0cdd741b15ec1166e61a91926173a385a6 100644 (file)
@@ -1,10 +1,11 @@
 /*
  *  linux/drivers/block/ide.h
  *
- *  Copyright (C) 1994, 1995  Linus Torvalds & authors
+ *  Copyright (C) 1994-1996  Linus Torvalds & authors
  */
 
 #include <linux/config.h>
+#include <asm/ide.h>
 
 /*
  * This is the multiple IDE interface driver, as evolved from hd.c.  
@@ -51,10 +52,6 @@ void cmd640_dump_regs (void);
 #endif
 #endif  /* CONFIG_BLK_DEV_CMD640 */
 
-#if defined(CONFIG_BLK_DEV_IDECD) || defined(CONFIG_BLK_DEV_IDETAPE)
-#define CONFIG_BLK_DEV_IDEATAPI 1
-#endif
-
 /*
  * IDE_DRIVE_CMD is used to implement many features of the hdparm utility
  */
@@ -80,12 +77,13 @@ typedef unsigned char       byte;   /* used everywhere */
 #undef REALLY_FAST_IO
 #endif
 
+#define HWIF(drive)            ((ide_hwif_t *)((drive)->hwif))
+#define HWGROUP(drive)         ((ide_hwgroup_t *)(HWIF(drive)->hwgroup))
+
 /*
  * Definitions for accessing IDE controller registers
  */
-
-#define HWIF(drive)            ((ide_hwif_t *)((drive)->hwif))
-#define HWGROUP(drive)         ((ide_hwgroup_t *)(HWIF(drive)->hwgroup))
+#define IDE_NR_PORTS           (10)
 
 #define IDE_DATA_OFFSET                (0)
 #define IDE_ERROR_OFFSET       (1)
@@ -95,21 +93,29 @@ typedef unsigned char       byte;   /* used everywhere */
 #define IDE_HCYL_OFFSET                (5)
 #define IDE_SELECT_OFFSET      (6)
 #define IDE_STATUS_OFFSET      (7)
+#define IDE_CONTROL_OFFSET     (8)
+#define IDE_IRQ_OFFSET         (9)
+
 #define IDE_FEATURE_OFFSET     IDE_ERROR_OFFSET
 #define IDE_COMMAND_OFFSET     IDE_STATUS_OFFSET
 
-#define IDE_DATA_REG           (HWIF(drive)->io_base+IDE_DATA_OFFSET)
-#define IDE_ERROR_REG          (HWIF(drive)->io_base+IDE_ERROR_OFFSET)
-#define IDE_NSECTOR_REG                (HWIF(drive)->io_base+IDE_NSECTOR_OFFSET)
-#define IDE_SECTOR_REG         (HWIF(drive)->io_base+IDE_SECTOR_OFFSET)
-#define IDE_LCYL_REG           (HWIF(drive)->io_base+IDE_LCYL_OFFSET)
-#define IDE_HCYL_REG           (HWIF(drive)->io_base+IDE_HCYL_OFFSET)
-#define IDE_SELECT_REG         (HWIF(drive)->io_base+IDE_SELECT_OFFSET)
-#define IDE_STATUS_REG         (HWIF(drive)->io_base+IDE_STATUS_OFFSET)
-#define IDE_CONTROL_REG                (HWIF(drive)->ctl_port)
+#define IDE_DATA_REG           (HWIF(drive)->io_ports[IDE_DATA_OFFSET])
+#define IDE_ERROR_REG          (HWIF(drive)->io_ports[IDE_ERROR_OFFSET])
+#define IDE_NSECTOR_REG                (HWIF(drive)->io_ports[IDE_NSECTOR_OFFSET])
+#define IDE_SECTOR_REG         (HWIF(drive)->io_ports[IDE_SECTOR_OFFSET])
+#define IDE_LCYL_REG           (HWIF(drive)->io_ports[IDE_LCYL_OFFSET])
+#define IDE_HCYL_REG           (HWIF(drive)->io_ports[IDE_HCYL_OFFSET])
+#define IDE_SELECT_REG         (HWIF(drive)->io_ports[IDE_SELECT_OFFSET])
+#define IDE_STATUS_REG         (HWIF(drive)->io_ports[IDE_STATUS_OFFSET])
+#define IDE_CONTROL_REG                (HWIF(drive)->io_ports[IDE_CONTROL_OFFSET])
+#define IDE_IRQ_REG            (HWIF(drive)->io_ports[IDE_IRQ_OFFSET])
+
 #define IDE_FEATURE_REG                IDE_ERROR_REG
 #define IDE_COMMAND_REG                IDE_STATUS_REG
 #define IDE_ALTSTATUS_REG      IDE_CONTROL_REG
+#define IDE_IREASON_REG                IDE_NSECTOR_REG
+#define IDE_BCOUNTL_REG                IDE_LCYL_REG
+#define IDE_BCOUNTH_REG                IDE_HCYL_REG
 
 #ifdef REALLY_FAST_IO
 #define OUT_BYTE(b,p)          outb((b),(p))
@@ -136,9 +142,6 @@ typedef unsigned char       byte;   /* used everywhere */
 #define PARTN_BITS     6       /* number of minor dev bits for partitions */
 #define PARTN_MASK     ((1<<PARTN_BITS)-1)     /* a useful bit mask */
 #define MAX_DRIVES     2       /* per interface; 2 assumed by lots of code */
-#ifndef MAX_HWIFS
-#define MAX_HWIFS      4       /* an arbitrary, but realistic limit */
-#endif
 #define SECTOR_WORDS   (512 / 4)       /* number of 32bit words per sector */
 
 /*
@@ -160,140 +163,20 @@ typedef unsigned char    byte;   /* used everywhere */
        if (hwif->selectproc)                                   \
                hwif->selectproc(drive);                        \
        else                                                    \
-               OUT_BYTE((drive)->select.all, hwif->io_base+IDE_SELECT_OFFSET); \
+               OUT_BYTE((drive)->select.all, hwif->io_ports[IDE_SELECT_OFFSET]); \
 }
 #else
-#define SELECT_DRIVE(hwif,drive)  OUT_BYTE((drive)->select.all, hwif->io_base+IDE_SELECT_OFFSET);
+#define SELECT_DRIVE(hwif,drive)  OUT_BYTE((drive)->select.all, hwif->io_ports[IDE_SELECT_OFFSET]);
 #endif /* CONFIG_BLK_DEV_HT6560B || CONFIG_BLK_DEV_PROMISE */
                
-#ifdef CONFIG_BLK_DEV_IDETAPE
-#include "ide-tape.h"
-#endif /* CONFIG_BLK_DEV_IDETAPE */
-
-#ifdef CONFIG_BLK_DEV_IDECD
-
-struct atapi_request_sense {
-       unsigned char error_code : 7;
-       unsigned char valid      : 1;
-       byte reserved1;
-       unsigned char sense_key  : 4;
-       unsigned char reserved2  : 1;
-       unsigned char ili        : 1;
-       unsigned char reserved3  : 2;
-       byte info[4];
-       byte sense_len;
-       byte command_info[4];
-       byte asc;
-       byte ascq;
-       byte fru;
-       byte sense_key_specific[3];
-};
-
-struct packet_command {
-       char *buffer;
-       int buflen;
-       int stat;
-       struct atapi_request_sense *sense_data;
-       unsigned char c[12];
-};
-
-
-/* Structure of a MSF cdrom address. */
-struct atapi_msf {
-       byte reserved;
-       byte minute;
-       byte second;
-       byte frame;
-};
-
-
-/* Space to hold the disk TOC. */
-
-#define MAX_TRACKS 99
-struct atapi_toc_header {
-       unsigned short toc_length;
-       byte first_track;
-       byte last_track;
-};
-
-struct atapi_toc_entry {
-       byte reserved1;
-       unsigned control : 4;
-       unsigned adr     : 4;
-       byte track;
-       byte reserved2;
-       union {
-               unsigned lba;
-               struct atapi_msf msf;
-       } addr;
-};
-
-struct atapi_toc {
-       int    last_session_lba;
-       int    xa_flag;
-       unsigned capacity;
-       struct atapi_toc_header hdr;
-       struct atapi_toc_entry  ent[MAX_TRACKS+1];
-         /* One extra for the leadout. */
-};
-
-
-/* This structure is annoyingly close to, but not identical with,
-   the cdrom_subchnl structure from cdrom.h. */
-struct atapi_cdrom_subchnl 
-{
-       u_char  acdsc_reserved;
-       u_char  acdsc_audiostatus;
-       u_short acdsc_length;
-       u_char  acdsc_format;
-
-       u_char  acdsc_adr:      4;
-       u_char  acdsc_ctrl:     4;
-       u_char  acdsc_trk;
-       u_char  acdsc_ind;
-       union {
-               struct atapi_msf msf;
-               int     lba;
-       } acdsc_absaddr;
-       union {
-               struct atapi_msf msf;
-               int     lba;
-       } acdsc_reladdr;
-};
-
-
-/* Extra per-device info for cdrom drives. */
-struct cdrom_info {
-
-       /* Buffer for table of contents.  NULL if we haven't allocated
-          a TOC buffer for this device yet. */
-
-       struct atapi_toc *toc;
-
-       /* Sector buffer.  If a read request wants only the first part
-          of a cdrom block, we cache the rest of the block here,
-          in the expectation that that data is going to be wanted soon.
-          SECTOR_BUFFERED is the number of the first buffered sector,
-          and NSECTORS_BUFFERED is the number of sectors in the buffer.
-          Before the buffer is allocated, we should have
-          SECTOR_BUFFER == NULL and NSECTORS_BUFFERED == 0. */
-
-       unsigned long sector_buffered;
-       unsigned long nsectors_buffered;
-       char *sector_buffer;
-
-       /* The result of the last successful request sense command
-          on this device. */
-       struct atapi_request_sense sense_data;
-};
-
-#endif /* CONFIG_BLK_DEV_IDECD */
-
 /*
  * Now for the data we need to maintain per-drive:  ide_drive_t
  */
 
-typedef enum {ide_disk, ide_cdrom, ide_tape} ide_media_t;
+#define ide_disk       0x20
+#define ide_cdrom      0x5
+#define ide_tape       0x1
+#define ide_floppy     0x0
 
 typedef union {
        unsigned all                    : 8;    /* all of the bits together */
@@ -306,17 +189,6 @@ typedef union {
                } b;
        } special_t;
 
-typedef union {
-       unsigned all                    : 8;    /* all of the bits together */
-       struct {
-               unsigned head           : 4;    /* always zeros here */
-               unsigned unit           : 1;    /* drive select number, 0 or 1 */
-               unsigned bit5           : 1;    /* always 1 */
-               unsigned lba            : 1;    /* using LBA instead of CHS */
-               unsigned bit7           : 1;    /* always 1 */
-       } b;
-       } select_t;
-
 typedef struct ide_drive_s {
        special_t       special;        /* special action flags */
        unsigned present        : 1;    /* drive is physically present */
@@ -332,10 +204,12 @@ typedef struct ide_drive_s {
        unsigned nobios         : 1;    /* flag: do not probe bios for drive */
        unsigned slow           : 1;    /* flag: slow data port */
        unsigned autotune       : 2;    /* 1=autotune, 2=noautotune, 0=default */
+       unsigned revalidate     : 1;    /* request revalidation */
+       unsigned bswap          : 1;    /* flag: byte swap data */
 #if FAKE_FDISK_FOR_EZDRIVE
        unsigned remap_0_to_1   : 1;    /* flag: partitioned with ezdrive */
 #endif /* FAKE_FDISK_FOR_EZDRIVE */
-       ide_media_t     media;          /* disk, cdrom, tape */
+       byte            media;          /* disk, cdrom, tape, floppy, ... */
        select_t        select;         /* basic drive/head select reg value */
        byte            ctl;            /* "normal" value for IDE_CONTROL_REG */
        byte            ready_stat;     /* min status value for drive ready */
@@ -357,12 +231,8 @@ typedef struct ide_drive_s {
        struct hd_driveid *id;          /* drive model identification info */
        struct hd_struct  *part;        /* drive partition table */
        char            name[4];        /* drive name, such as "hda" */
-#ifdef CONFIG_BLK_DEV_IDECD
-       struct cdrom_info cdrom_info;   /* for ide-cd.c */
-#endif /* CONFIG_BLK_DEV_IDECD */
-#ifdef CONFIG_BLK_DEV_IDETAPE
-       idetape_tape_t  tape;           /* for ide-tape.c */
-#endif /* CONFIG_BLK_DEV_IDETAPE */
+       void            *driver;        /* (ide_driver_t *) */
+       void            *driver_data;   /* extra driver data */
        } ide_drive_t;
 
 /*
@@ -416,8 +286,7 @@ typedef enum {      ide_unknown,    ide_generic,    ide_triton,
 typedef struct hwif_s {
        struct hwif_s   *next;          /* for linked-list in ide_hwgroup_t */
        void            *hwgroup;       /* actually (ide_hwgroup_t *) */
-       unsigned short  io_base;        /* base io port addr */
-       unsigned short  ctl_port;       /* usually io_base+0x206 */
+       ide_ioreg_t     io_ports[IDE_NR_PORTS]; /* task file registers */
        ide_drive_t     drives[MAX_DRIVES];     /* drive info */
        struct gendisk  *gd;            /* gendisk structure */
        ide_tuneproc_t  *tuneproc;      /* routine to tune PIO mode for drives */
@@ -427,9 +296,9 @@ typedef struct hwif_s {
        ide_dmaproc_t   *dmaproc;       /* dma read/write/abort routine */
        unsigned long   *dmatable;      /* dma physical region descriptor table */
        unsigned short  dma_base;       /* base addr for dma ports (triton) */
-       byte            irq;            /* our irq number */
+       int             irq;            /* our irq number */
        byte            major;          /* our major number */
-       char            name[5];        /* name of interface, eg. "ide0" */
+       char            name[6];        /* name of interface, eg. "ide0" */
        byte            index;          /* 0 for ide0; 1 for ide1; ... */
        hwif_chipset_t  chipset;        /* sub-module for tuning.. */
        unsigned        noprobe    : 1; /* don't probe for this interface */
@@ -439,16 +308,10 @@ typedef struct hwif_s {
 #ifdef CONFIG_BLK_DEV_PROMISE
        unsigned        is_promise2: 1; /* 2nd i/f on promise DC4030 */
 #endif /* CONFIG_BLK_DEV_PROMISE */
+       unsigned        reset      : 1; /* reset after probe */
 #if (DISK_RECOVERY_TIME > 0)
        unsigned long   last_time;      /* time when previous rq was done */
 #endif
-#ifdef CONFIG_BLK_DEV_IDECD
-       struct request request_sense_request;   /* from ide-cd.c */
-       struct packet_command request_sense_pc; /* from ide-cd.c */
-#endif /* CONFIG_BLK_DEV_IDECD */
-#ifdef CONFIG_BLK_DEV_IDETAPE
-       ide_drive_t     *tape_drive;    /* Pointer to the tape on this interface */
-#endif /* CONFIG_BLK_DEV_IDETAPE */
        } ide_hwif_t;
 
 /*
@@ -467,6 +330,55 @@ typedef struct hwgroup_s {
        unsigned long           poll_timeout;   /* timeout value during long polls */
        } ide_hwgroup_t;
 
+/*
+ * Subdrivers support.
+ */
+#define IDE_SUBDRIVER_VERSION  0
+
+typedef int    (ide_cleanup_proc)(ide_drive_t *);
+typedef void   (ide_do_request_proc)(ide_drive_t *, struct request *, unsigned long);
+typedef void   (ide_end_request_proc)(byte, ide_hwgroup_t *);
+typedef int    (ide_ioctl_proc)(ide_drive_t *, struct inode *, struct file *, unsigned int, unsigned long);
+typedef int    (ide_open_proc)(struct inode *, struct file *, ide_drive_t *);
+typedef void   (ide_release_proc)(struct inode *, struct file *, ide_drive_t *);
+typedef int    (ide_check_media_change_proc)(ide_drive_t *);
+typedef void   (ide_pre_reset_proc)(ide_drive_t *);
+typedef unsigned long (ide_capacity_proc)(ide_drive_t *);
+typedef void   (ide_special_proc)(ide_drive_t *);
+
+typedef struct ide_driver_s {
+       byte                            media;
+       unsigned busy                   : 1;
+       unsigned supports_dma           : 1;
+       ide_cleanup_proc                *cleanup;
+       ide_do_request_proc             *do_request;
+       ide_end_request_proc            *end_request;
+       ide_ioctl_proc                  *ioctl;
+       ide_open_proc                   *open;
+       ide_release_proc                *release;
+       ide_check_media_change_proc     *media_change;
+       ide_pre_reset_proc              *pre_reset;
+       ide_capacity_proc               *capacity;
+       ide_special_proc                *special;
+       } ide_driver_t;
+
+#define DRIVER(drive)          ((ide_driver_t *)((drive)->driver))
+
+/*
+ * IDE modules.
+ */
+#define IDE_CHIPSET_MODULE             0       /* not supported yet */
+#define IDE_PROBE_MODULE               1
+#define IDE_DRIVER_MODULE              2
+
+typedef int    (ide_module_init_proc)(void);
+
+typedef struct ide_module_s {
+       int                             type;
+       ide_module_init_proc            *init;
+       struct ide_module_s             *next;
+} ide_module_t;
+
 /*
  * ide_hwifs[] is the master data structure used to keep track
  * of just about everything in ide.c.  Whenever possible, routines
@@ -485,22 +397,17 @@ extern    ide_hwif_t      ide_hwifs[];            /* master data repository */
 #define IDE_DRIVER     /* "parameter" for blk.h */
 #include <linux/blk.h>
 
-#if (DISK_RECOVERY_TIME > 0)
-void ide_set_recovery_timer (ide_hwif_t *);
-#define SET_RECOVERY_TIMER(drive) ide_set_recovery_timer (drive)
-#else
-#define SET_RECOVERY_TIMER(drive)
-#endif
-
 /*
- * This is used for (nearly) all data transfers from the IDE interface
+ * This is used for (nearly) all data transfers from/to the IDE interface
  */
 void ide_input_data (ide_drive_t *drive, void *buffer, unsigned int wcount);
+void ide_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount);
 
 /*
- * This is used for (nearly) all data transfers to the IDE interface
+ * This is used for (nearly) all ATAPI data transfers from/to the IDE interface
  */
-void ide_output_data (ide_drive_t *drive, void *buffer, unsigned int wcount);
+void atapi_input_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount);
+void atapi_output_bytes (ide_drive_t *drive, void *buffer, unsigned int bytecount);
 
 /*
  * This is used on exit from the driver, to designate the next irq handler
@@ -519,6 +426,12 @@ byte ide_dump_status (ide_drive_t *drive, const char *msg, byte stat);
  */
 void ide_error (ide_drive_t *drive, const char *msg, byte stat);
 
+/*
+ * Issue a simple drive command
+ * The drive must be selected beforehand.
+ */
+void ide_cmd(ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler);
+
 /*
  * ide_fixstring() cleans up and (optionally) byte-swaps a text string,
  * removing leading/trailing blanks and compressing internal blanks.
@@ -611,7 +524,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, byte stat, byte err);
 
 /*
  * ide_system_bus_speed() returns what we think is the system VESA/PCI
- * bus speed (in Mhz).  This is used for calculating interface PIO timings.
+ * bus speed (in MHz).  This is used for calculating interface PIO timings.
  * The default is 40 for known PCI systems, 50 otherwise.
  * The "idebus=xx" parameter can be used to override this value.
  */
@@ -623,76 +536,61 @@ int ide_system_bus_speed (void);
  */
 void ide_multwrite (ide_drive_t *drive, unsigned int mcount);
 
-#ifdef CONFIG_BLK_DEV_IDECD
-/*
- * These are routines in ide-cd.c invoked from ide.c
- */
-void ide_do_rw_cdrom (ide_drive_t *, unsigned long);
-int ide_cdrom_ioctl (ide_drive_t *, struct inode *, struct file *, unsigned int, unsigned long);
-int ide_cdrom_check_media_change (ide_drive_t *);
-int ide_cdrom_open (struct inode *, struct file *, ide_drive_t *);
-void ide_cdrom_release (struct inode *, struct file *, ide_drive_t *);
-void ide_cdrom_setup (ide_drive_t *);
-#endif /* CONFIG_BLK_DEV_IDECD */
-
-#ifdef CONFIG_BLK_DEV_IDETAPE
-
-/*
- *     Functions in ide-tape.c which are invoked from ide.c:
- */
-
-/*
- *     idetape_identify_device is called during device probing stage to
- *     probe for an ide atapi tape drive and to initialize global variables
- *     in ide-tape.c which provide the link between the character device
- *     and the corresponding block device.
- *
- *     Returns 1 if an ide tape was detected and is supported.
- *     Returns 0 otherwise.
- */
-int idetape_identify_device (ide_drive_t *drive,struct hd_driveid *id);
-
-/*
- *     idetape_setup is called a bit later than idetape_identify_device,
- *     during the search for disk partitions, to initialize various tape
- *     state variables in ide_drive_t *drive.
- */
-void idetape_setup (ide_drive_t *drive);
-
-/*
- *     idetape_do_request is our request function. It is called by ide.c
- *     to process a new request.
- */
-
-void idetape_do_request (ide_drive_t *drive, struct request *rq, unsigned long block);
-
-/*
- *     idetape_end_request is used to finish servicing a request, and to
- *     insert a pending pipeline request into the main device queue.
- */
-void idetape_end_request (byte uptodate, ide_hwgroup_t *hwgroup);
+void ide_revalidate_drives (void);
 
-/*
- *     Block device interface functions.
- */
-  
-int idetape_blkdev_ioctl (ide_drive_t *drive, struct inode *inode, struct file *file,
-                       unsigned int cmd, unsigned long arg);
-int idetape_blkdev_open (struct inode *inode, struct file *filp, ide_drive_t *drive);
-void idetape_blkdev_release (struct inode *inode, struct file *filp, ide_drive_t *drive);
+void ide_timer_expiry (unsigned long data);
+void ide_intr (int irq, void *dev_id, struct pt_regs *regs);
+void ide_geninit (struct gendisk *gd);
+void do_ide0_request (void);
+#if MAX_HWIFS > 1
+void do_ide1_request (void);
+#endif
+#if MAX_HWIFS > 2
+void do_ide2_request (void);
+#endif
+#if MAX_HWIFS > 3
+void do_ide3_request (void);
+#endif
+void ide_init_subdrivers (void);
 
-/*
- *     idetape_register_chrdev initializes the character device interface to
- *     the ide tape drive.
- */
-void idetape_register_chrdev (void);
+#ifndef _IDE_C
+extern struct file_operations ide_fops[];
+#endif
 
+#ifdef CONFIG_BLK_DEV_IDECD
+int ide_cdrom_init (void);
+#endif /* CONFIG_BLK_DEV_IDECD */
+#ifdef CONFIG_BLK_DEV_IDETAPE
+int idetape_init (void);
 #endif /* CONFIG_BLK_DEV_IDETAPE */
+#ifdef CONFIG_BLK_DEV_IDEFLOPPY
+int idefloppy_init (void);
+#endif /* CONFIG_BLK_DEV_IDEFLOPPY */
+#ifdef CONFIG_BLK_DEV_IDEDISK
+int idedisk_init (void);
+#endif /* CONFIG_BLK_DEV_IDEDISK */
+
+int ide_register_module (ide_module_t *module);
+void ide_unregister_module (ide_module_t *module);
+ide_drive_t *ide_scan_devices (byte media, ide_driver_t *driver, int n);
+int ide_register_subdriver (ide_drive_t *drive, ide_driver_t *driver, int version);
+int ide_unregister_subdriver (ide_drive_t *drive);
 
 #ifdef CONFIG_BLK_DEV_TRITON
 void ide_init_triton (byte, byte);
 #endif /* CONFIG_BLK_DEV_TRITON */
+
+#ifdef CONFIG_BLK_DEV_OPTI621
+void ide_init_opti621 (byte, byte);
+#endif /* CONFIG_BLK_DEV_OPTI621 */
+
+#ifdef CONFIG_BLK_DEV_IDE
+int ideprobe_init (void);
+#endif /* CONFIG_BLK_DEV_IDE */
+
+#ifdef CONFIG_BLK_DEV_PROMISE
+#include "promise.h"
+#define IS_PROMISE_DRIVE (HWIF(drive)->chipset == ide_promise)
+#else
+#define IS_PROMISE_DRIVE (0)   /* auto-NULLs out Promise code */
+#endif /* CONFIG_BLK_DEV_PROMISE */
diff --git a/drivers/block/opti621.c b/drivers/block/opti621.c
new file mode 100644 (file)
index 0000000..2e7bd73
--- /dev/null
@@ -0,0 +1,337 @@
+/*
+ *  linux/drivers/block/opti621.c       Version 0.1  Oct 26, 1996
+ *
+ *  Copyright (C) 1996  Linus Torvalds & author (see below)
+ */
+
+/*
+ * OPTi 82C621 chipset EIDE controller driver
+ * Author: Jaromir Koutek (E-mail: Jaromir.Koutek@st.mff.cuni.cz)
+ *
+ * Some parts of code are from ali14xx.c and from rz1000.c.
+ * I used docs from OPTi databook, from ftp.opti.com, file 9123-0002.ps
+ * and disassembled/traced setupvic.exe (DOS program).
+ * It increases kernel code about 2 kB.
+ * My card is Octek PIDE 1.01 (on card) or OPTiViC (program).
+ * It has a place for a secondary connector in circuit, but nothing
+ * is there. It cost about $25. Also BIOS says no address for
+ * secondary controller (see bellow in ide_init_opti621).
+ * I've only tested this on my system, which only has one disk.
+ * It's Western Digital WDAC2850, with PIO mode 3. The PCI bus
+ * is at 20 MHz (I have DX2/80, I tried PCI at 40, but I got random
+ * lockups). I tried the OCTEK double speed CD-ROM and
+ * it does not work! But I can't boot DOS also, so it's probably
+ * hardware fault. I have connected Conner 80MB, the Seagate 850MB (no
+ * problems) and Seagate 1GB (as slave, WD as master). My experiences
+ * with the third, 1GB drive: I got 3MB/s (hdparm), but sometimes
+ * it slows to about 100kB/s! I don't know why and I have
+ * not this drive now, so I can't try it again.
+ * If you have two disk, please boot in single mode and carefully
+ * (you can boot on read-only fs) try to set PIO mode 0 etc.
+ * The main problem with OPTi is that some timings for master
+ * and slave must be the same. For example, if you have master
+ * PIO 3 and slave PIO 0, driver have to set some timings of
+ * master for PIO 0. Second problem is that opti621_tune_drive
+ * got only one drive to set, but have to set both drives.
+ * This is solved in opti621_compute_pios. If you don't set
+ * the second drive, opti621_compute_pios use ide_get_best_pio_mode
+ * for autoselect mode (you can change it to PIO 0, if you want).
+ * If you then set the second drive to another PIO, the old value
+ * (automatically selected) will be overrided by yours.
+ * I don't know what there is a 25/33MHz switch in configuration
+ * register, driver is written for use at any frequency which get
+ * (use idebus=xx to select PCI bus speed).
+ * Use ide0=autotune for automatical tune of the PIO modes.
+ * If you get strange results, do not use this and set PIO manually
+ * by hdparm.
+ * I write this driver because I lost the paper ("manual") with
+ * settings of jumpers on the card and I have to boot Linux with
+ * Loadlin except LILO, cause I have to run the setupvic.exe program
+ * already or I get disk errors (my test: rpm -Vf
+ * /usr/X11R6/bin/XF86_SVGA - or any big file). 
+ * Some numbers from hdparm -t /dev/hda:
+ * Timing buffer-cache reads:   32 MB in  3.02 seconds =10.60 MB/sec
+ * Timing buffered disk reads:  16 MB in  5.52 seconds = 2.90 MB/sec
+ * I have 4 Megs/s before, but I don't know why (maybe bad hdparm).
+ * If you tried this driver, please send me a E-mail of your experiences.
+ * My E-mail address is Jaromir.Koutek@st.mff.cuni.cz (I hope
+ * till 30. 6. 2000), otherwise you can try miri@atrey.karlin.mff.cuni.cz.
+ * I think OPTi is trademark of OPTi, Octek is trademark of Octek and so on.
+ */
+
+#undef REALLY_SLOW_IO  /* most systems can safely undef this */
+#define OPTI621_DEBUG          /* define for debug messages */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/ioport.h>
+#include <linux/blkdev.h>
+#include <linux/hdreg.h>
+#include <asm/io.h>
+#include "ide.h"
+#include "ide_modes.h"
+#include <linux/pci.h>
+#include <linux/bios32.h>
+
+#define OPTI621_MAX_PIO 3
+/* In fact, I do not have any PIO 4 drive
+ * (address: 25 ns, data: 70 ns, recovery: 35 ns),
+ * but OPTi 82C621 is programmable and it can do (minimal values):
+ * on 40MHz PCI bus (pulse 25 ns):
+ *  address: 25 ns, data: 25 ns, recovery: 50 ns;
+ * on 20MHz PCI bus (pulse 50 ns):
+ *  address: 50 ns, data: 50 ns, recovery: 100 ns.
+ */ 
+
+/* #define READ_PREFETCH 0 */
+/* Uncommnent for disable read prefetch.
+ * There is some readprefetch capatibility in hdparm,
+ * but when I type hdparm -P 1 /dev/hda, I got errors
+ * and till reset drive is inacessible.
+ * This (hw) read prefetch is safe on my drive.
+ */
+
+#ifndef READ_PREFETCH
+#define READ_PREFETCH 0x40 /* read prefetch is enabled */
+#endif /* else read prefetch is disabled */
+
+#define READ_REG 0     /* index of Read cycle timing register */
+#define WRITE_REG 1    /* index of Write cycle timing register */
+#define MISC_REG 6     /* index of Miscellaneous register */
+#define CNTRL_REG 3    /* index of Control register */
+int reg_base;
+int opti621_primary_base, opti621_secondary_base;
+
+#define PIO_NOT_EXIST 254
+#define PIO_DONT_KNOW 255
+int opti621_drive_pio_modes[4];
+/* there are stored pio numbers from other calls of opti621_tune_drive */
+
+void opti621_compute_pios(ide_hwif_t *drv, int second_contr, int slave_drive, byte pio)
+/* Store values into opti621_drive_pio_modes:
+ *     second_contr - 0 for primary controller, 1 for secondary
+ *     slave_drive - 0 -> pio is for master, 1 -> pio is for slave
+ *     pio - PIO mode for selected drive (for other we don't know)     
+ */     
+{
+       ide_drive_t *p1, *p2, *drive;
+       int i;
+       
+       i = 2*second_contr;
+       p1 = &drv->drives[0];
+       p2 = &drv->drives[1];
+       drive = &drv->drives[slave_drive]; 
+       pio = ide_get_best_pio_mode(drive, pio, OPTI621_MAX_PIO, NULL);
+       opti621_drive_pio_modes[i+slave_drive]=pio;
+
+       if (p1->present) {
+               if (opti621_drive_pio_modes[i]==PIO_DONT_KNOW)
+                       opti621_drive_pio_modes[i]=ide_get_best_pio_mode(p1,
+                               255, OPTI621_MAX_PIO, NULL);
+               /* we don't know the selected PIO mode, so we have to autoselect */
+       } else
+               opti621_drive_pio_modes[i]=PIO_NOT_EXIST;
+       if (p2->present) {
+               if (opti621_drive_pio_modes[i+1]==PIO_DONT_KNOW)
+                       opti621_drive_pio_modes[i+1]=ide_get_best_pio_mode(p2,
+                               255, OPTI621_MAX_PIO, NULL);
+               /* we don't know the selected PIO mode, so we have to autoselect */
+       } else
+               opti621_drive_pio_modes[i+1]=PIO_NOT_EXIST;
+       /* in opti621_drive_pio_modes[i] and [i+1] are valid PIO modes (or PIO_NOT_EXIST,
+               if drive is not connected), we can continue */
+#ifdef OPTI621_DEBUG
+       printk("%s: (master): ", p1->name);
+       if (p1->present)
+               printk("PIO mode %d\n", opti621_drive_pio_modes[i]);
+       else
+               printk("not present\n");
+       printk("%s: (slave): ", p2->name);
+       if (p2->present)
+               printk("PIO mode %d\n", opti621_drive_pio_modes[i+1]);
+       else
+               printk("not present\n");
+#endif
+}
+
+int cmpt_clk(int time, int bus_speed)
+/* Returns (rounded up) time in clocks for time in ns,
+ * with bus_speed in MHz.
+ * Example: bus_speed = 40 MHz, time = 80 ns
+ * 1000/40 = 25 ns (clk value),
+ * 80/25 = 3.2, rounded up to 4 (I hope ;-)).
+ * Use idebus=xx to select right frequency.
+ */
+{
+       return ((time*bus_speed+999)/1000);
+}
+
+void write_reg(byte value, int reg)
+/* Write value to register reg, base of register
+ * is at reg_base (0x1f0 primary, 0x170 secondary,
+ * if not changed by PCI configuration).
+ * This is from setupvic.exe program.
+ */
+{
+       inw(reg_base+1);
+       inw(reg_base+1);
+       outb(3, reg_base+2);
+       outb(value, reg_base+reg);
+       outb(0x83, reg_base+2); 
+}
+
+byte read_reg(int reg)
+/* Read value from register reg, base of register
+ * is at reg_base (0x1f0 primary, 0x170 secondary, 
+ * if not changed by PCI configuration).
+ * This is from setupvic.exe program.
+ */
+{
+       byte ret;
+       inw(reg_base+1);
+       inw(reg_base+1);
+       outb(3, reg_base+2);
+       ret=inb(reg_base+reg);
+       outb(0x83, reg_base+2); 
+       return ret;
+}
+
+typedef struct pio_clocks_s {
+       int     address_time;   /* Address setup (clocks) */
+       int     data_time;      /* Active/data pulse (clocks) */
+       int     recovery_time;  /* Recovery time (clocks) */
+} pio_clocks_t;
+
+void compute_clocks(int pio, pio_clocks_t *clks)
+{
+        if (pio!=PIO_NOT_EXIST) {
+               int adr_setup, data_pls, bus_speed;
+               bus_speed = ide_system_bus_speed();
+               adr_setup = ide_pio_timings[pio].setup_time;
+               data_pls = ide_pio_timings[pio].active_time;
+               clks->address_time = cmpt_clk(adr_setup, bus_speed);
+               clks->data_time = cmpt_clk(data_pls, bus_speed);
+               clks->recovery_time = cmpt_clk(ide_pio_timings[pio].cycle_time
+                       -adr_setup-data_pls, bus_speed);
+               if (clks->address_time<1) clks->address_time = 1;
+               if (clks->address_time>4) clks->address_time = 4;
+               if (clks->data_time<1) clks->data_time = 1;
+               if (clks->data_time>16) clks->data_time = 16;
+               if (clks->recovery_time<2) clks->recovery_time = 2;
+               if (clks->recovery_time>17) clks->recovery_time = 17;
+       } else {
+               clks->address_time = 1;
+               clks->data_time = 1;
+               clks->recovery_time = 2;
+               /* minimal values */
+       }
+}
+
+static void opti621_tune_drive (ide_drive_t *drive, byte pio)
+/* Main tune procedure, hooked by tuneproc. */
+{
+       /* primary and secondary drives share some (but not same) registers,
+       so we have to program both drives */
+       unsigned long flags;
+       byte pio1, pio2;
+       int second_contr, slave_drive;
+       pio_clocks_t first, second;
+       int ax, drdy;
+       byte cycle1, cycle2, misc;
+               
+       second_contr=HWIF(drive)->index;
+       if ((second_contr!=0) && (second_contr!=1))
+               return; /* invalid controller number */
+       if (((second_contr==0) && (opti621_primary_base==0)) ||
+               ((second_contr==1) && (opti621_secondary_base==0)))
+               return; /* controller is unaccessible/not exist */
+       slave_drive = drive->select.b.unit;
+       /* set opti621_drive_pio_modes[] */
+       opti621_compute_pios(HWIF(drive), second_contr, slave_drive, pio);
+       
+       reg_base = second_contr ? opti621_primary_base : opti621_secondary_base;
+
+       pio1 = opti621_drive_pio_modes[second_contr*2];
+       pio2 = opti621_drive_pio_modes[second_contr*2+1];
+       
+       compute_clocks(pio1, &first);
+       compute_clocks(pio2, &second);
+       
+       ax = (first.address_time<second.address_time) ?
+               (second.address_time) : (first.address_time); /* in ax is max(a1,a2) */
+       drdy = 2; /* DRDY is default 2 (by OPTi Databook) */
+
+       cycle1 = ((first.data_time-1)<<4) | (first.recovery_time-2);
+       cycle2 = ((second.data_time-1)<<4) | (second.recovery_time-2);
+       misc = READ_PREFETCH | ((ax-1)<<4) | ((drdy-2)<<1);
+       
+#ifdef OPTI621_DEBUG
+       printk("%s: master: address: %d, data: %d, recovery: %d, drdy: %d [clk]\n",
+               HWIF(drive)->name, ax, first.data_time, first.recovery_time, drdy);
+       printk("%s: slave:  address: %d, data: %d, recovery: %d, drdy: %d [clk]\n",
+               HWIF(drive)->name, ax, second.data_time, second.recovery_time, drdy);
+#endif
+
+       save_flags(flags);
+       cli();
+       
+       outb(0xc0, reg_base+CNTRL_REG); /* allow Register-B */
+       outb(0xff, reg_base+5);         /* hmm, setupvic.exe does this ;-) */
+       inb(reg_base+CNTRL_REG);        /* if reads 0xff, adapter not exist? */
+       read_reg(CNTRL_REG);            /* if reads 0xc0, no interface exist? */
+       read_reg(5);                    /* read version, probably 0 */
+       
+       /* programming primary drive - 0 or 2 */
+       write_reg(0, MISC_REG);         /* select Index-0 for Register-A */
+       write_reg(cycle1, READ_REG);    /* set read cycle timings */
+       write_reg(cycle1, WRITE_REG);   /* set write cycle timings */
+
+       /* programming secondary drive - 1 or 3 */
+       write_reg(1, MISC_REG); /* select Index-1 for Register-B */
+       write_reg(cycle2, READ_REG); /* set read cycle timings */
+       write_reg(cycle2, WRITE_REG); /* set write cycle timings */
+       
+       write_reg(0x85, CNTRL_REG); /* use Register-A for drive 0 (or 2) and
+               Register-B for drive 1 (or 3) */ 
+               
+       write_reg(misc, MISC_REG); /* set address setup, DRDY timings
+               and read prefetch for both drives */
+               
+       restore_flags(flags);
+}
+
+void ide_init_opti621 (byte bus, byte fn)
+/* Init controller. Called on kernel boot. */
+{
+       int rc, i;
+       unsigned char sreg;
+       unsigned short reg;
+       unsigned int dreg;
+       unsigned char revision;
+       for (i=0; i<4; i++)
+               opti621_drive_pio_modes[i] = PIO_DONT_KNOW;
+       printk("ide: OPTi 82C621 on PCI bus %d function %d\n", bus, fn);
+       if ((rc = pcibios_read_config_byte (bus, fn, 0x08, &sreg)))
+               goto quit;
+       revision = sreg;
+       if ((rc = pcibios_read_config_dword (bus, fn, 0x10, &dreg)))
+               goto quit;
+       opti621_primary_base = ((dreg==0) || (dreg>0xffff)) ? 0 : dreg-1;
+       if ((rc = pcibios_read_config_dword (bus, fn, 0x18, &dreg)))
+               goto quit;
+       opti621_secondary_base = ((dreg==0) || (dreg>0xffff)) ? 0 : dreg-1;
+       printk("ide: revision %d, primary: 0x%04x, secondary: 0x%04x\n",
+               revision, opti621_primary_base, opti621_secondary_base);
+       if ((rc = pcibios_read_config_word (bus, fn, PCI_COMMAND, &reg)))
+               goto quit;
+       if (!(reg & 1)) {
+               printk("ide: ports are not enabled (BIOS)\n");
+       } else {
+               ide_hwifs[0].tuneproc = &opti621_tune_drive;
+               ide_hwifs[1].tuneproc = &opti621_tune_drive;
+       }
+  quit: if (rc) printk("ide: pcibios access failed - %s\n", pcibios_strerror(rc));
+}
index 06442636c281e36c3b2a459fa9e5e5f139516ee3..f3a8d5529394c09bff86c8fde3ac3bb0d7b05c5c 100644 (file)
@@ -172,14 +172,16 @@ int init_dc4030 (void)
        hwif->selectproc = second_hwif->selectproc = &promise_selectproc;
 /* Shift the remaining interfaces down by one */
        for (i=MAX_HWIFS-1 ; i > hwif->index+1 ; i--) {
+               ide_hwif_t *h = &ide_hwifs[i];
+
                printk("Shifting i/f %d values to i/f %d\n",i-1,i);
-               ide_hwifs[i].io_base = ide_hwifs[i-1].io_base;
-               ide_hwifs[i].ctl_port = ide_hwifs[i-1].ctl_port;
-               ide_hwifs[i].noprobe = ide_hwifs[i-1].noprobe;
+               ide_init_hwif_ports(h->io_ports, (h-1)->io_ports[IDE_DATA_OFFSET], NULL);
+               h->io_ports[IDE_CONTROL_OFFSET] = (h-1)->io_ports[IDE_CONTROL_OFFSET];
+               h->noprobe = (h-1)->noprobe;
        }
        second_hwif->is_promise2 = 1;
-       second_hwif->io_base = hwif->io_base;
-       second_hwif->ctl_port = hwif->ctl_port; 
+       ide_init_hwif_ports(second_hwif->io_ports, hwif->io_ports[IDE_DATA_OFFSET], NULL);
+       second_hwif->io_ports[IDE_CONTROL_OFFSET] = hwif->io_ports[IDE_CONTROL_OFFSET];
        second_hwif->irq = hwif->irq;
        for (i=0; i<2 ; i++) {
             hwif->drives[i].io_32bit = 3;
@@ -309,12 +311,11 @@ static void promise_write (ide_drive_t *drive)
 void do_promise_io (ide_drive_t *drive, struct request *rq)
 {
        unsigned long timeout;
-       unsigned short io_base = HWIF(drive)->io_base;
        byte stat;
 
        if (rq->cmd == READ) {
            ide_set_handler(drive, &promise_read_intr, WAIT_CMD);
-           OUT_BYTE(PROMISE_READ, io_base+IDE_COMMAND_OFFSET);
+           OUT_BYTE(PROMISE_READ, IDE_COMMAND_REG);
 /* The card's behaviour is odd at this point. If the data is
    available, DRQ will be true, and no interrupt will be
    generated by the card. If this is the case, we need to simulate
@@ -336,7 +337,7 @@ void do_promise_io (ide_drive_t *drive, struct request *rq)
 */
                    return;
                }
-               if(IN_BYTE(io_base+IDE_SELECT_OFFSET) & 0x01)
+               if(IN_BYTE(IDE_SELECT_REG) & 0x01)
                    return;
                udelay(1);
            } while (jiffies < timeout);
@@ -345,7 +346,7 @@ void do_promise_io (ide_drive_t *drive, struct request *rq)
            return;
        }
        if (rq->cmd == WRITE) {
-           OUT_BYTE(PROMISE_WRITE, io_base+IDE_COMMAND_OFFSET);
+           OUT_BYTE(PROMISE_WRITE, IDE_COMMAND_REG);
            if (ide_wait_stat(drive, DATA_READY, drive->bad_wstat, WAIT_DRQ)) {
                printk("%s: no DRQ after issuing PROMISE_WRITE\n", drive->name);
                return;
index bbbeec8c7cb3e0c7ed309010eca33674dc32556c..4faeae8087283c323ac5f3bd6c1487e67e105178 100644 (file)
  * Pretty much the same code will work for the OPTi "Viper" chipset.
  * Look for DMA support for this in linux kernel 2.1.xx, when it appears.
  *
- * DMA is currently supported only for hard disk drives (not cdroms).
- *
- * Support for cdroms will likely be added at a later date,
- * after broader experience has been obtained with hard disks.
- *
  * Up to four drives may be enabled for DMA, and the Triton chipset will
  * (hopefully) arbitrate the PCI bus among them.  Note that the i82371 chip
  * provides a single "line buffer" for the BM IDE function, so performance of
  *
  * And, yes, Intel Zappa boards really *do* use the Triton IDE ports.
  */
-#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/timer.h>
@@ -198,13 +192,8 @@ static int build_dmatable (ide_drive_t *drive)
                 * is always composed of two adjacent physical 4kB pages rather
                 * than two possibly non-adjacent physical 4kB pages.
                 */
-               if (bh == NULL) {  /* paging and tape requests have (rq->bh == NULL) */
+               if (bh == NULL) {  /* paging requests have (rq->bh == NULL) */
                        addr = virt_to_bus (rq->buffer);
-#ifdef CONFIG_BLK_DEV_IDETAPE
-                       if (drive->media == ide_tape)
-                               size = drive->tape.pc->request_transfer;
-                       else
-#endif /* CONFIG_BLK_DEV_IDETAPE */    
                        size = rq->nr_sectors << 9;
                } else {
                        /* group sequential buffers into one large buffer */
@@ -317,10 +306,8 @@ static int triton_dmaproc (ide_dma_action_t func, ide_drive_t *drive)
        outl(virt_to_bus (HWIF(drive)->dmatable), dma_base + 4); /* PRD table */
        outb(reading, dma_base);                        /* specify r/w */
        outb(inb(dma_base+2)|0x06, dma_base+2);         /* clear status bits */
-#ifdef CONFIG_BLK_DEV_IDEATAPI
        if (drive->media != ide_disk)
                return 0;
-#endif /* CONFIG_BLK_DEV_IDEATAPI */   
        ide_set_handler(drive, &dma_intr, WAIT_CMD);    /* issue cmd to drive */
        OUT_BYTE(reading ? WIN_READDMA : WIN_WRITEDMA, IDE_COMMAND_REG);
        outb(inb(dma_base)|1, dma_base);                /* begin DMA */
@@ -449,14 +436,14 @@ void ide_init_triton (byte bus, byte fn)
 #endif /* DISPLAY_TRITON_TIMINGS */
                ide_hwif_t *hwif = &ide_hwifs[h];
                unsigned short time;
-               if (hwif->io_base == 0x1f0) {
+               if (hwif->io_ports[IDE_DATA_OFFSET] == 0x1f0) {
                        time = timings & 0xffff;
                        if ((time & 0x8000) == 0)       /* interface enabled? */
                                continue;
                        hwif->chipset = ide_triton;
                        if (dma_enabled)
                                init_triton_dma(hwif, bmiba);
-               } else if (hwif->io_base == 0x170) {
+               } else if (hwif->io_ports[IDE_DATA_OFFSET] == 0x170) {
                        time = timings >> 16;
                        if ((time & 0x8000) == 0)       /* interface enabled? */
                                continue;
@@ -475,7 +462,7 @@ void ide_init_triton (byte bus, byte fn)
                {
                        byte stime;
                        if (pcibios_read_config_byte(bus, fn, 0x44, &stime)) {
-                               if (hwif->io_base == 0x1f0) {
+                               if (hwif->io_ports[IDE_DATA_OFFSET] == 0x1f0) {
                                        s_clks = ~stime >> 6;
                                        r_clks = ~stime >> 4;
                                } else {
index 837641cfca8291062fed37908420ea71a8565fe5..227e951eb23887f928c0a5daf444189cdbca7ffe 100644 (file)
@@ -64,6 +64,7 @@ static const char *mcdx_c_version
 #include <linux/mm.h>
 #include <linux/malloc.h>
 #include <asm/io.h>
+#include <asm/uaccess.h>
 
 #include <linux/major.h>
 #define MAJOR_NR MITSUMI_X_CDROM_MAJOR
index 266480bb4521d9300a0b69638728d620d70ea1ad..08fd06e2530afd74b94c30b7e9188eddc07b37cd 100644 (file)
@@ -1,6 +1,41 @@
+Fri Nov  8 20:19:50 1996  Theodore Ts'o  <tytso@rsts-11.mit.edu>
+
+       * n_tty.c (n_tty_flush_buffer): Only call driver->unthrottle() if
+               the tty was previous throttled.
+               (n_tty_set_termios, write_chan): Add changes suggested by
+                       Simon P. Allen to allow hardware cooking.
+
+       * tty_ioctl.c (set_termios): If we get a signal while waiting for
+               the tty to drain, return -EINTR.
+       
+       * serial.c (change_speed): Add support for CREAD, as required by
+               POSIX.
+
+Sat Nov  2 20:43:10 1996  Theodore Ts'o  <tytso@rsts-11.mit.edu>
+
+       * serial.c: Wholesale changes.  Added support for the Startech
+               16650 and 16650V2 chips.  (WARNING: the new startech
+               16650A may or may not work!)  Added support for the
+               TI16750 (not yet tested).  Split async_struct into a
+               transient part (async_struct) and a permanent part
+               (serial_state) which contains the configuration
+               information for the ports.  Added new driver routines
+               wait_until_sent() and send_xchar() to help with POSIX
+               compliance.  Added support for radio clocks which waggle
+               the carrier detect line (CONFIG_HARD_PPS).
+       
+       * tty_ioctl.c (tty_wait_until_sent): Added call to new driver
+               function tty->driver.wait_until_sent(), which returns when
+               the tty's device xmit buffers are drained.  Needed for
+               full POSIX compliance.
+
+               (send_prio_char): New function, called by the ioctl's
+               TCIOFF and TCION; uses the new driver call send_xchar(),
+               which will send the XON or XOFF character at high priority
+               (and even if tty output is stopped).
+
 Wed Jun  5 18:52:04 1996  Theodore Ts'o  <tytso@rsts-11.mit.edu>
 
-       * tty_io.c (do_tty_hangup): 
        * pty.c (pty_close): When closing a pty, make sure packet mode is
                cleared.
 
index accc35015308042dc5d8b16d5d511de4e45d714d..b9e72dba7f737f81e3a3eefb5b170d37dfcb2688 100644 (file)
@@ -1459,7 +1459,8 @@ static int do_con_write(struct tty_struct * tty, int from_user,
                 ok = tc && (c >= 32 ||
                             (!utf && !(((disp_ctrl ? CTRL_ALWAYS
                                          : CTRL_ACTION) >> c) & 1)))
-                        && (c != 127 || disp_ctrl);
+                        && (c != 127 || disp_ctrl)
+                       && (c != 128+27);
 
                if (vc_state == ESnormal && ok) {
                        /* Now try to find out how to display it */
@@ -1499,6 +1500,8 @@ static int do_con_write(struct tty_struct * tty, int from_user,
                 *  of an escape sequence.
                 */
                switch (c) {
+                       case 0:
+                               continue;
                        case 7:
                                if (bell_duration)
                                        kd_mksound(bell_pitch, bell_duration);
index a0531f08c9aaf5c20d8d9b09f5cf7b249fe20bbd..446456a2d4bded26a9d0415ff60bcf6f993c8e93 100644 (file)
@@ -508,8 +508,9 @@ static void do_self(unsigned char value, char up_flag)
 #define A_CFLEX  '^'
 #define A_TILDE  '~'
 #define A_DIAER  '"'
-static unsigned char ret_diacr[] =
-        {A_GRAVE, A_ACUTE, A_CFLEX, A_TILDE, A_DIAER };
+#define A_CEDIL  ','
+static unsigned char ret_diacr[NR_DEAD] =
+        {A_GRAVE, A_ACUTE, A_CFLEX, A_TILDE, A_DIAER, A_CEDIL };
 
 /* If a dead key pressed twice, output a character corresponding to it, */
 /* otherwise just remember the dead key.                               */
index 3fee4013d615395b8a255b77f109607ea5c30f9b..87b9811b4f92450038b119599b0925d3da0b9803 100644 (file)
@@ -822,8 +822,9 @@ static void do_self(unsigned char value, char up_flag)
 #define A_CFLEX  '^'
 #define A_TILDE  '~'
 #define A_DIAER  '"'
-static unsigned char ret_diacr[] =
-       {A_GRAVE, A_ACUTE, A_CFLEX, A_TILDE, A_DIAER };
+#define A_CEDIL  ','
+static unsigned char ret_diacr[NR_DEAD] =
+       {A_GRAVE, A_ACUTE, A_CFLEX, A_TILDE, A_DIAER, A_CEDIL };
 
 /* If a dead key pressed twice, output a character corresponding to it,        */
 /* otherwise just remember the dead key.                               */
index 59357a4ef1e50307272fc7c3ec957cb1bb6b441e..e77685dbd8c54936d7637e9f3cfacdb26f893919 100644 (file)
@@ -67,6 +67,7 @@ extern int ms_bus_mouse_init(void);
 extern int atixl_busmouse_init(void);
 extern int sun_mouse_init(void);
 extern void watchdog_init(void);
+extern void wdt_init(void);
 extern void pcwatchdog_init(void);
 extern int rtc_init(void);
 
@@ -222,7 +223,10 @@ int misc_init(void)
 #endif
 #ifdef CONFIG_SOFT_WATCHDOG
        watchdog_init();
-#endif 
+#endif
+#ifdef CONFIG_WDT
+       wdt_init();
+#endif
 #ifdef CONFIG_PCWATCHDOG
        pcwatchdog_init();
 #endif
index 143d9e0a82de938bb84381540454b5a5b7863c32..0c0e057f8c3d233c2dbac56a58cbbd2f86588400 100644 (file)
@@ -76,8 +76,9 @@ void n_tty_flush_buffer(struct tty_struct * tty)
        if (!tty->link)
                return;
 
-       if (tty->driver.unthrottle)
-               (tty->driver.unthrottle)(tty);
+       if (tty->driver.unthrottle &&
+           clear_bit(TTY_THROTTLED, &tty->flags))
+               tty->driver.unthrottle(tty);
        if (tty->link->packet) {
                tty->ctrl_status |= TIOCPKT_FLUSHREAD;
                wake_up_interruptible(&tty->link->read_wait);
@@ -629,6 +630,11 @@ static void n_tty_set_termios(struct tty_struct *tty, struct termios * old)
                return;
        
        tty->icanon = (L_ICANON(tty) != 0);
+       if (tty->flags & (1<<TTY_HW_COOK_IN)) {
+               tty->raw = 1;
+               tty->real_raw = 1;
+               return;
+       }
        if (I_ISTRIP(tty) || I_IUCLC(tty) || I_IGNCR(tty) ||
            I_ICRNL(tty) || I_INLCR(tty) || L_ICANON(tty) ||
            I_IXON(tty) || L_ISIG(tty) || L_ECHO(tty) ||
@@ -948,7 +954,7 @@ static int write_chan(struct tty_struct * tty, struct file * file,
                        retval = -EIO;
                        break;
                }
-               if (O_OPOST(tty)) {
+               if (O_OPOST(tty) && !(tty->flags & (1<<TTY_HW_COOK_OUT))) {
                        while (nr > 0) {
                                get_user(c, b);
                                if (opost(c, tty) < 0)
index 6655f7b285a941ce497a5232f27e29712eb9d393..40860ac727aa8a31d3c3f436217a149b2a03cbbc 100644 (file)
@@ -42,6 +42,7 @@
 #include <linux/ptrace.h>
 #include <linux/ioport.h>
 #include <linux/mm.h>
+#include <linux/malloc.h>
 
 #include <asm/system.h>
 #include <asm/io.h>
@@ -49,7 +50,7 @@
 #include <asm/bitops.h>
 
 static char *serial_name = "Serial driver";
-static char *serial_version = "4.13";
+static char *serial_version = "4.20";
 
 DECLARE_TASK_QUEUE(tq_serial);
 
@@ -109,8 +110,27 @@ static volatile int rs_irq_triggered;
 static volatile int rs_triggered;
 static int rs_wild_int_mask;
 
-static void autoconfig(struct async_struct * info);
+static void autoconfig(struct serial_state * info);
 static void change_speed(struct async_struct *info);
+static void rs_wait_until_sent(struct tty_struct *tty, int timeout);
+
+/*
+ * Here we define the default xmit fifo size used for each type of
+ * UART
+ */
+static struct serial_uart_config uart_config[] = {
+       { "unknown", 1, 0 }, 
+       { "8250", 1, 0 }, 
+       { "16450", 1, 0 }, 
+       { "16550", 1, 0 }, 
+       { "16550A", 16, UART_CLEAR_FIFO | UART_USE_FIFO }, 
+       { "cirrus", 1, 0 }, 
+       { "ST16650", 1, UART_CLEAR_FIFO |UART_STARTECH }, 
+       { "ST16650V2", 32, UART_CLEAR_FIFO | UART_USE_FIFO |
+                 UART_STARTECH }, 
+       { "TI16750", 64, UART_CLEAR_FIFO | UART_USE_FIFO},
+       { 0, 0}
+};
        
 /*
  * This assumes you have a 1.8432 MHz clock for your UART.
@@ -146,7 +166,7 @@ static void change_speed(struct async_struct *info);
 
 #define C_P(card,port) (((card)<<6|(port)<<3) + 1)
 
-struct async_struct rs_table[] = {
+struct serial_state rs_table[] = {
        /* UART CLK   PORT IRQ     FLAGS        */
        { 0, BASE_BAUD, 0x3F8, 4, STD_COM_FLAGS },      /* ttyS0 */
        { 0, BASE_BAUD, 0x2F8, 3, STD_COM_FLAGS },      /* ttyS1 */
@@ -204,7 +224,7 @@ struct async_struct rs_table[] = {
 #endif
 };
 
-#define NR_PORTS       (sizeof(rs_table)/sizeof(struct async_struct))
+#define NR_PORTS       (sizeof(rs_table)/sizeof(struct serial_state))
 
 static struct tty_struct *serial_table[NR_PORTS];
 static struct termios *serial_termios[NR_PORTS];
@@ -252,7 +272,7 @@ static inline int serial_paranoia_check(struct async_struct *info,
  */
 static int baud_table[] = {
        0, 50, 75, 110, 134, 150, 200, 300, 600, 1200, 1800, 2400, 4800,
-       9600, 19200, 38400, 57600, 115200, 0 };
+       9600, 19200, 38400, 57600, 115200, 230400, 460800, 0 };
 
 static inline unsigned int serial_in(struct async_struct *info, int offset)
 {
@@ -400,11 +420,14 @@ static _INLINE_ void receive_chars(struct async_struct *info,
 
        do {
                ch = serial_inp(info, UART_RX);
+               if (*status & UART_LSR_BI)
+                       *status &= ~(UART_LSR_FE | UART_LSR_PE);
                if (*status & info->ignore_status_mask) {
                        if (++ignored > 100)
                                break;
                        goto ignore_char;
                }
+               *status &= info->read_status_mask;
                if (tty->flip.count >= TTY_FLIPBUF_SIZE)
                        break;
                tty->flip.count++;
@@ -425,7 +448,7 @@ static _INLINE_ void receive_chars(struct async_struct *info,
                        *tty->flip.flag_buf_ptr++ = 0;
                *tty->flip.char_buf_ptr++ = ch;
        ignore_char:
-               *status = serial_inp(info, UART_LSR) & info->read_status_mask;
+               *status = serial_inp(info, UART_LSR);
        } while (*status & UART_LSR_DR);
        queue_task_irq_off(&tty->flip.tqueue, &tq_timer);
 #ifdef SERIAL_DEBUG_INTR
@@ -477,19 +500,27 @@ static _INLINE_ void transmit_chars(struct async_struct *info, int *intr_done)
 static _INLINE_ void check_modem_status(struct async_struct *info)
 {
        int     status;
+       struct  async_icount *icount;
        
        status = serial_in(info, UART_MSR);
 
        if (status & UART_MSR_ANY_DELTA) {
+               icount = &info->state->icount;
                /* update input line counters */
                if (status & UART_MSR_TERI)
-                       info->icount.rng++;
+                       icount->rng++;
                if (status & UART_MSR_DDSR)
-                       info->icount.dsr++;
-               if (status & UART_MSR_DDCD)
-                       info->icount.dcd++;
+                       icount->dsr++;
+               if (status & UART_MSR_DDCD) {
+                       icount->dcd++;
+#ifdef CONFIG_HARD_PPS
+                       if ((info->flags & ASYNC_HARDPPS_CD) &&
+                           (status & UART_MSR_DCD))
+                               hardpps();
+#endif
+               }
                if (status & UART_MSR_DCTS)
-                       info->icount.cts++;
+                       icount->cts++;
                wake_up_interruptible(&info->delta_msr_wait);
        }
 
@@ -569,7 +600,7 @@ static void rs_interrupt(int irq, void *dev_id, struct pt_regs * regs)
 
                info->last_active = jiffies;
 
-               status = serial_inp(info, UART_LSR) & info->read_status_mask;
+               status = serial_inp(info, UART_LSR);
 #ifdef SERIAL_DEBUG_INTR
                printk("status = %x...", status);
 #endif
@@ -594,7 +625,8 @@ static void rs_interrupt(int irq, void *dev_id, struct pt_regs * regs)
        } while (end_mark != info);
        if (multi->port_monitor)
                printk("rs port monitor (normal) irq %d: 0x%x, 0x%x\n",
-                      info->irq, first_multi, inb(multi->port_monitor));
+                      info->state->irq, first_multi,
+                      inb(multi->port_monitor));
 #ifdef SERIAL_DEBUG_INTR
        printk("end.\n");
 #endif
@@ -624,7 +656,7 @@ static void rs_interrupt_single(int irq, void *dev_id, struct pt_regs * regs)
                first_multi = inb(multi->port_monitor);
 
        do {
-               status = serial_inp(info, UART_LSR) & info->read_status_mask;
+               status = serial_inp(info, UART_LSR);
 #ifdef SERIAL_DEBUG_INTR
                printk("status = %x...", status);
 #endif
@@ -643,7 +675,8 @@ static void rs_interrupt_single(int irq, void *dev_id, struct pt_regs * regs)
        info->last_active = jiffies;
        if (multi->port_monitor)
                printk("rs port monitor (single) irq %d: 0x%x, 0x%x\n",
-                      info->irq, first_multi, inb(multi->port_monitor));
+                      info->state->irq, first_multi,
+                      inb(multi->port_monitor));
 #ifdef SERIAL_DEBUG_INTR
        printk("end.\n");
 #endif
@@ -683,7 +716,7 @@ static void rs_interrupt_multi(int irq, void *dev_id, struct pt_regs * regs)
 
                info->last_active = jiffies;
 
-               status = serial_inp(info, UART_LSR) & info->read_status_mask;
+               status = serial_inp(info, UART_LSR);
 #ifdef SERIAL_DEBUG_INTR
                printk("status = %x...", status);
 #endif
@@ -707,7 +740,7 @@ static void rs_interrupt_multi(int irq, void *dev_id, struct pt_regs * regs)
                }
                if (multi->port_monitor)
                        printk("rs port monitor irq %d: 0x%x, 0x%x\n",
-                              info->irq, first_multi,
+                              info->state->irq, first_multi,
                               inb(multi->port_monitor));
                if ((inb(multi->port1) & multi->mask1) != multi->match1)
                        continue;
@@ -910,13 +943,13 @@ static int startup(struct async_struct * info)
        unsigned long flags;
        int     retval;
        void (*handler)(int, void *, struct pt_regs *);
+       struct serial_state *state= info->state;
        unsigned long page;
 
        page = get_free_page(GFP_KERNEL);
        if (!page)
                return -ENOMEM;
 
-       
        save_flags(flags); cli();
 
        if (info->flags & ASYNC_INITIALIZED) {
@@ -925,7 +958,7 @@ static int startup(struct async_struct * info)
                return 0;
        }
 
-       if (!info->port || !info->type) {
+       if (!state->port || !state->type) {
                if (info->tty)
                        set_bit(TTY_IO_ERROR, &info->tty->flags);
                free_page(page);
@@ -938,23 +971,30 @@ static int startup(struct async_struct * info)
                info->xmit_buf = (unsigned char *) page;
 
 #ifdef SERIAL_DEBUG_OPEN
-       printk("starting up ttys%d (irq %d)...", info->line, info->irq);
+       printk("starting up ttys%d (irq %d)...", info->line, state->irq);
 #endif
 
+       if (uart_config[info->state->type].flags & UART_STARTECH) {
+               /* Wake up UART */
+               serial_outp(info, UART_LCR, 0xBF);
+               serial_outp(info, UART_EFR, UART_EFR_ECB);
+               serial_outp(info, UART_IER, 0);
+               serial_outp(info, UART_EFR, 0);
+               serial_outp(info, UART_LCR, 0);
+       }
+
+       if (info->state->type == PORT_16750) {
+               /* Wake up UART */
+               serial_outp(info, UART_IER, 0);
+       }
+
        /*
         * Clear the FIFO buffers and disable them
         * (they will be reenabled in change_speed())
         */
-       if (info->type == PORT_16650) {
-               serial_outp(info, UART_FCR, (UART_FCR_CLEAR_RCVR |
-                                            UART_FCR_CLEAR_XMIT));
-               info->xmit_fifo_size = 1; /* disabled for now */
-       } else if (info->type == PORT_16550A) {
+       if (uart_config[state->type].flags & UART_CLEAR_FIFO)
                serial_outp(info, UART_FCR, (UART_FCR_CLEAR_RCVR |
                                             UART_FCR_CLEAR_XMIT));
-               info->xmit_fifo_size = 16;
-       } else
-               info->xmit_fifo_size = 1;
 
        /*
         * At this point there's no way the LSR could still be 0xFF;
@@ -974,18 +1014,18 @@ static int startup(struct async_struct * info)
        /*
         * Allocate the IRQ if necessary
         */
-       if (info->irq && (!IRQ_ports[info->irq] ||
-                         !IRQ_ports[info->irq]->next_port)) {
-               if (IRQ_ports[info->irq]) {
-                       free_irq(info->irq, NULL);
-                       if (rs_multiport[info->irq].port1)
+       if (state->irq && (!IRQ_ports[state->irq] ||
+                         !IRQ_ports[state->irq]->next_port)) {
+               if (IRQ_ports[state->irq]) {
+                       free_irq(state->irq, NULL);
+                       if (rs_multiport[state->irq].port1)
                                handler = rs_interrupt_multi;
                        else
                                handler = rs_interrupt;
                } else 
                        handler = rs_interrupt_single;
 
-               retval = request_irq(info->irq, handler, IRQ_T(info),
+               retval = request_irq(state->irq, handler, IRQ_T(info),
                                     "serial", NULL);
                if (retval) {
                        restore_flags(flags);
@@ -1022,7 +1062,7 @@ static int startup(struct async_struct * info)
        info->MCR |= UART_MCR_OUT1 | UART_MCR_OUT2;
        info->MCR_noint |= UART_MCR_OUT1 | UART_MCR_OUT2;
 #endif
-       if (info->irq == 0)
+       if (state->irq == 0)
                info->MCR = info->MCR_noint;
        serial_outp(info, UART_MCR, info->MCR);
        
@@ -1055,11 +1095,11 @@ static int startup(struct async_struct * info)
         * Insert serial port into IRQ chain.
         */
        info->prev_port = 0;
-       info->next_port = IRQ_ports[info->irq];
+       info->next_port = IRQ_ports[state->irq];
        if (info->next_port)
                info->next_port->prev_port = info;
-       IRQ_ports[info->irq] = info;
-       figure_IRQ_timeout(info->irq);
+       IRQ_ports[state->irq] = info;
+       figure_IRQ_timeout(state->irq);
 
        /*
         * Set up serial timers...
@@ -1084,14 +1124,17 @@ static int startup(struct async_struct * info)
 static void shutdown(struct async_struct * info)
 {
        unsigned long   flags;
+       struct serial_state *state;
        int             retval;
 
        if (!(info->flags & ASYNC_INITIALIZED))
                return;
 
+       state = info->state;
+
 #ifdef SERIAL_DEBUG_OPEN
        printk("Shutting down serial port %d (irq %d)....", info->line,
-              info->irq);
+              state->irq);
 #endif
        
        save_flags(flags); cli(); /* Disable interrupts */
@@ -1110,24 +1153,24 @@ static void shutdown(struct async_struct * info)
        if (info->prev_port)
                info->prev_port->next_port = info->next_port;
        else
-               IRQ_ports[info->irq] = info->next_port;
-       figure_IRQ_timeout(info->irq);
+               IRQ_ports[state->irq] = info->next_port;
+       figure_IRQ_timeout(state->irq);
        
        /*
         * Free the IRQ, if necessary
         */
-       if (info->irq && (!IRQ_ports[info->irq] ||
-                         !IRQ_ports[info->irq]->next_port)) {
-               if (IRQ_ports[info->irq]) {
-                       free_irq(info->irq, NULL);
-                       retval = request_irq(info->irq, rs_interrupt_single,
+       if (state->irq && (!IRQ_ports[state->irq] ||
+                         !IRQ_ports[state->irq]->next_port)) {
+               if (IRQ_ports[state->irq]) {
+                       free_irq(state->irq, NULL);
+                       retval = request_irq(state->irq, rs_interrupt_single,
                                             IRQ_T(info), "serial", NULL);
                        
                        if (retval)
                                printk("serial shutdown: request_irq: error %d"
                                       "  Couldn't reacquire IRQ.\n", retval);
                } else
-                       free_irq(info->irq, NULL);
+                       free_irq(state->irq, NULL);
        }
 
        if (info->xmit_buf) {
@@ -1155,7 +1198,18 @@ static void shutdown(struct async_struct * info)
        
        if (info->tty)
                set_bit(TTY_IO_ERROR, &info->tty->flags);
-       
+
+       if (uart_config[info->state->type].flags & UART_STARTECH) {
+               /* Arrange to enter sleep mode */
+               serial_outp(info, UART_LCR, 0xBF);
+               serial_outp(info, UART_EFR, UART_EFR_ECB);
+               serial_outp(info, UART_IER, UART_IERX_SLEEP);
+               serial_outp(info, UART_LCR, 0);
+       }
+       if (info->state->type == PORT_16750) {
+               /* Arrange to enter sleep mode */
+               serial_outp(info, UART_IER, UART_IERX_SLEEP);
+       }
        info->flags &= ~ASYNC_INITIALIZED;
        restore_flags(flags);
 }
@@ -1167,8 +1221,8 @@ static void shutdown(struct async_struct * info)
 static void change_speed(struct async_struct *info)
 {
        unsigned short port;
-       int     quot = 0;
-       unsigned cflag,cval,fcr;
+       int     quot = 0, baud_base;
+       unsigned cflag, cval, fcr = 0;
        int     i;
 
        if (!info->tty || !info->tty->termios)
@@ -1179,7 +1233,7 @@ static void change_speed(struct async_struct *info)
        i = cflag & CBAUD;
        if (i & CBAUDEX) {
                i &= ~CBAUDEX;
-               if (i < 1 || i > 2
+               if (i < 1 || i > 4
                        info->tty->termios->c_cflag &= ~CBAUDEX;
                else
                        i += 15;
@@ -1189,29 +1243,28 @@ static void change_speed(struct async_struct *info)
                        i += 1;
                if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI)
                        i += 2;
+               if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI)
+                       i += 3;
+               if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP)
+                       i += 4;
                if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_CUST)
-                       quot = info->custom_divisor;
+                       quot = info->state->custom_divisor;
        }
+       baud_base = info->state->baud_base;
        if (quot) {
                info->timeout = ((info->xmit_fifo_size*HZ*15*quot) /
-                                info->baud_base) + 2;
+                                baud_base) + 2;
        } else if (baud_table[i] == 134) {
-               quot = (2*info->baud_base / 269);
+               quot = (2*baud_base / 269);
                info->timeout = (info->xmit_fifo_size*HZ*30/269) + 2;
        } else if (baud_table[i]) {
-               quot = info->baud_base / baud_table[i];
+               quot = baud_base / baud_table[i];
                info->timeout = (info->xmit_fifo_size*HZ*15/baud_table[i]) + 2;
        } else {
                quot = 0;
                info->timeout = 0;
        }
-       if (quot) {
-               info->MCR |= UART_MCR_DTR;
-               info->MCR_noint |= UART_MCR_DTR;
-               cli();
-               serial_out(info, UART_MCR, info->MCR);
-               sti();
-       } else {
+       if (!quot) {
                info->MCR &= ~UART_MCR_DTR;
                info->MCR_noint &= ~UART_MCR_DTR;
                cli();
@@ -1234,28 +1287,19 @@ static void change_speed(struct async_struct *info)
                cval |= UART_LCR_PARITY;
        if (!(cflag & PARODD))
                cval |= UART_LCR_EPAR;
-       if (info->type == PORT_16550A) {
-               if ((info->baud_base / quot) < 2400)
+       if (uart_config[info->state->type].flags & UART_USE_FIFO) {
+               if ((info->state->baud_base / quot) < 2400)
                        fcr = UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_1;
                else
                        fcr = UART_FCR_ENABLE_FIFO | UART_FCR_TRIGGER_8;
-       } else if (info->type == PORT_16650) {
-               /*
-                * On the 16650, we disable the FIFOs altogether
-                * because of a design bug in how the implement
-                * things.  We could support it by completely changing
-                * how we handle the interrupt driver, but not today....
-                *
-                * N.B.  Because there's no way to set a FIFO trigger
-                * at 1 char, we'd probably disable at speed below
-                * 2400 baud anyway...
-                */
-               fcr = 0;
-       } else
-               fcr = 0;
+       }
+       if (info->state->type == PORT_16750)
+               fcr |= UART_FCR7_64BYTE;
        
        /* CTS flow control flag and modem status interrupts */
        info->IER &= ~UART_IER_MSI;
+       if (info->flags & ASYNC_HARDPPS_CD)
+               info->IER |= UART_IER_MSI;
        if (cflag & CRTSCTS) {
                info->flags |= ASYNC_CTS_FLOW;
                info->IER |= UART_IER_MSI;
@@ -1280,32 +1324,37 @@ static void change_speed(struct async_struct *info)
        if (I_BRKINT(info->tty) || I_PARMRK(info->tty))
                info->read_status_mask |= UART_LSR_BI;
        
+       /*
+        * Characters to ignore
+        */
        info->ignore_status_mask = 0;
-#if 0
-       /* This should be safe, but for some broken bits of hardware... */
-       if (I_IGNPAR(info->tty)) {
+       if (I_IGNPAR(info->tty))
                info->ignore_status_mask |= UART_LSR_PE | UART_LSR_FE;
-               info->read_status_mask |= UART_LSR_PE | UART_LSR_FE;
-       }
-#endif
        if (I_IGNBRK(info->tty)) {
                info->ignore_status_mask |= UART_LSR_BI;
-               info->read_status_mask |= UART_LSR_BI;
                /*
                 * If we're ignore parity and break indicators, ignore 
                 * overruns too.  (For real raw support).
                 */
-               if (I_IGNPAR(info->tty)) {
-                       info->ignore_status_mask |= UART_LSR_OE | \
-                               UART_LSR_PE | UART_LSR_FE;
-                       info->read_status_mask |= UART_LSR_OE | \
-                               UART_LSR_PE | UART_LSR_FE;
-               }
+               if (I_IGNPAR(info->tty))
+                       info->ignore_status_mask |= UART_LSR_OE;
        }
+       /*
+        * !!! ignore all characters if CREAD is not set
+        */
+       if ((cflag & CREAD) == 0)
+               info->ignore_status_mask |= UART_LSR_DR;
        cli();
+       if (uart_config[info->state->type].flags & UART_STARTECH) {
+               serial_outp(info, UART_LCR, 0xBF);
+               serial_outp(info, UART_EFR,
+                           (cflag & CRTSCTS) ? UART_EFR_CTS : 0);
+       }
        serial_outp(info, UART_LCR, cval | UART_LCR_DLAB);      /* set DLAB */
        serial_outp(info, UART_DLL, quot & 0xff);       /* LS of divisor */
        serial_outp(info, UART_DLM, quot >> 8);         /* MS of divisor */
+       if (info->state->type == PORT_16750)
+               serial_outp(info, UART_FCR, fcr);       /* set fcr */
        serial_outp(info, UART_LCR, cval);              /* reset DLAB */
        serial_outp(info, UART_FCR, fcr);       /* set fcr */
        sti();
@@ -1437,6 +1486,25 @@ static void rs_flush_buffer(struct tty_struct *tty)
                (tty->ldisc.write_wakeup)(tty);
 }
 
+/*
+ * This function is used to send a high-priority XON/XOFF character to
+ * the device
+ */
+void rs_send_xchar(struct tty_struct *tty, char ch)
+{
+       struct async_struct *info = (struct async_struct *)tty->driver_data;
+
+       if (serial_paranoia_check(info, tty->device, "rs_send_char"))
+               return;
+
+       info->x_char = ch;
+       if (ch) {
+               /* Make sure transmit interrupts are on */
+               info->IER |= UART_IER_THRI;
+               serial_out(info, UART_IER, info->IER);
+       }
+}
+
 /*
  * ------------------------------------------------------------
  * rs_throttle()
@@ -1459,10 +1527,12 @@ static void rs_throttle(struct tty_struct * tty)
                return;
        
        if (I_IXOFF(tty))
-               info->x_char = STOP_CHAR(tty);
+               rs_send_xchar(tty, STOP_CHAR(tty));
 
-       info->MCR &= ~UART_MCR_RTS;
-       info->MCR_noint &= ~UART_MCR_RTS;
+       if (tty->termios->c_cflag & CRTSCTS) {
+               info->MCR &= ~UART_MCR_RTS;
+               info->MCR_noint &= ~UART_MCR_RTS;
+       }
        cli();
        serial_out(info, UART_MCR, info->MCR);
        sti();
@@ -1485,10 +1555,12 @@ static void rs_unthrottle(struct tty_struct * tty)
                if (info->x_char)
                        info->x_char = 0;
                else
-                       info->x_char = START_CHAR(tty);
+                       rs_send_xchar(tty, START_CHAR(tty));
+       }
+       if (tty->termios->c_cflag & CRTSCTS) {
+               info->MCR |= UART_MCR_RTS;
+               info->MCR_noint |= UART_MCR_RTS;
        }
-       info->MCR |= UART_MCR_RTS;
-       info->MCR_noint |= UART_MCR_RTS;
        cli();
        serial_out(info, UART_MCR, info->MCR);
        sti();
@@ -1504,20 +1576,22 @@ static int get_serial_info(struct async_struct * info,
                           struct serial_struct * retinfo)
 {
        struct serial_struct tmp;
-  
+       struct serial_state *state = info->state;
+   
        if (!retinfo)
                return -EFAULT;
        memset(&tmp, 0, sizeof(tmp));
-       tmp.type = info->type;
-       tmp.line = info->line;
-       tmp.port = info->port;
-       tmp.irq = info->irq;
-       tmp.flags = info->flags;
-       tmp.baud_base = info->baud_base;
-       tmp.close_delay = info->close_delay;
-       tmp.closing_wait = info->closing_wait;
-       tmp.custom_divisor = info->custom_divisor;
-       tmp.hub6 = info->hub6;
+       tmp.type = state->type;
+       tmp.line = state->line;
+       tmp.port = state->port;
+       tmp.irq = state->irq;
+       tmp.flags = state->flags;
+       tmp.xmit_fifo_size = state->xmit_fifo_size;
+       tmp.baud_base = state->baud_base;
+       tmp.close_delay = state->close_delay;
+       tmp.closing_wait = state->closing_wait;
+       tmp.custom_divisor = state->custom_divisor;
+       tmp.hub6 = state->hub6;
        copy_to_user(retinfo,&tmp,sizeof(*retinfo));
        return 0;
 }
@@ -1526,29 +1600,32 @@ static int set_serial_info(struct async_struct * info,
                           struct serial_struct * new_info)
 {
        struct serial_struct new_serial;
-       struct async_struct old_info;
+       struct serial_state old_state, *state;
        unsigned int            i,change_irq,change_port;
        int                     retval = 0;
 
        if (!new_info)
                return -EFAULT;
        copy_from_user(&new_serial,new_info,sizeof(new_serial));
-       old_info = *info;
-
-       change_irq = new_serial.irq != info->irq;
-       change_port = (new_serial.port != info->port) || (new_serial.hub6 != info->hub6);
-
+       state = info->state;
+       old_state = *state;
+  
+       change_irq = new_serial.irq != state->irq;
+       change_port = (new_serial.port != state->port) ||
+               (new_serial.hub6 != state->hub6);
+  
        if (!suser()) {
                if (change_irq || change_port ||
-                   (new_serial.baud_base != info->baud_base) ||
-                   (new_serial.type != info->type) ||
-                   (new_serial.close_delay != info->close_delay) ||
+                   (new_serial.baud_base != state->baud_base) ||
+                   (new_serial.type != state->type) ||
+                   (new_serial.close_delay != state->close_delay) ||
+                   (new_serial.xmit_fifo_size != state->xmit_fifo_size) ||
                    ((new_serial.flags & ~ASYNC_USR_MASK) !=
-                    (info->flags & ~ASYNC_USR_MASK)))
+                    (state->flags & ~ASYNC_USR_MASK)))
                        return -EPERM;
-               info->flags = ((info->flags & ~ASYNC_USR_MASK) |
+               state->flags = ((state->flags & ~ASYNC_USR_MASK) |
                               (new_serial.flags & ASYNC_USR_MASK));
-               info->custom_divisor = new_serial.custom_divisor;
+               state->custom_divisor = new_serial.custom_divisor;
                goto check_and_exit;
        }
 
@@ -1563,13 +1640,13 @@ static int set_serial_info(struct async_struct * info,
        /* Make sure address is not already in use */
        if (new_serial.type) {
                for (i = 0 ; i < NR_PORTS; i++)
-                       if ((info != &rs_table[i]) &&
+                       if ((state != &rs_table[i]) &&
                            (rs_table[i].port == new_serial.port) &&
                            rs_table[i].type)
                                return -EADDRINUSE;
        }
 
-       if ((change_port || change_irq) && (info->count > 1))
+       if ((change_port || change_irq) && (state->count > 1))
                return -EBUSY;
 
        /*
@@ -1577,36 +1654,43 @@ static int set_serial_info(struct async_struct * info,
         * At this point, we start making changes.....
         */
 
-       info->baud_base = new_serial.baud_base;
-       info->flags = ((info->flags & ~ASYNC_FLAGS) |
+       state->baud_base = new_serial.baud_base;
+       state->flags = ((state->flags & ~ASYNC_FLAGS) |
                        (new_serial.flags & ASYNC_FLAGS));
-       info->custom_divisor = new_serial.custom_divisor;
-       info->type = new_serial.type;
-       info->close_delay = new_serial.close_delay * HZ/100;
-       info->closing_wait = new_serial.closing_wait * HZ/100;
-
-       release_region(info->port,8);
+       info->flags = ((state->flags & ~ASYNC_INTERNAL_FLAGS) |
+                      (info->flags & ASYNC_INTERNAL_FLAGS));
+       state->custom_divisor = new_serial.custom_divisor;
+       state->type = new_serial.type;
+       state->close_delay = new_serial.close_delay * HZ/100;
+       state->closing_wait = new_serial.closing_wait * HZ/100;
+       info->xmit_fifo_size = state->xmit_fifo_size =
+               new_serial.xmit_fifo_size;
+
+       release_region(state->port,8);
        if (change_port || change_irq) {
                /*
                 * We need to shutdown the serial port at the old
                 * port/irq combination.
                 */
                shutdown(info);
-               info->irq = new_serial.irq;
-               info->port = new_serial.port;
-               info->hub6 = new_serial.hub6;
+               state->irq = new_serial.irq;
+               info->port = state->port = new_serial.port;
+               info->hub6 = state->hub6 = new_serial.hub6;
        }
-       if(info->type != PORT_UNKNOWN)
-               request_region(info->port,8,"serial(set)");
+       if (state->type != PORT_UNKNOWN)
+               request_region(state->port,8,"serial(set)");
 
        
 check_and_exit:
-       if (!info->port || !info->type)
+       if (!state->port || !state->type)
                return 0;
-       if (info->flags & ASYNC_INITIALIZED) {
-               if (((old_info.flags & ASYNC_SPD_MASK) !=
-                    (info->flags & ASYNC_SPD_MASK)) ||
-                   (old_info.custom_divisor != info->custom_divisor))
+       if (state->type != old_state.type)
+               state->xmit_fifo_size =
+                       uart_config[state->type].dfl_xmit_fifo_size;
+       if (state->flags & ASYNC_INITIALIZED) {
+               if (((old_state.flags & ASYNC_SPD_MASK) !=
+                    (state->flags & ASYNC_SPD_MASK)) ||
+                   (old_state.custom_divisor != state->custom_divisor))
                        change_speed(info);
        } else
                retval = startup(info);
@@ -1713,13 +1797,13 @@ static int do_autoconfig(struct async_struct * info)
        if (!suser())
                return -EPERM;
        
-       if (info->count > 1)
+       if (info->state->count > 1)
                return -EBUSY;
        
        shutdown(info);
 
        cli();
-       autoconfig(info);
+       autoconfig(info->state);
        sti();
 
        retval = startup(info);
@@ -1795,7 +1879,7 @@ static int get_multiport_struct(struct async_struct * info,
        struct serial_multiport_struct ret;
        struct rs_multiport_struct *multi;
        
-       multi = &rs_multiport[info->irq];
+       multi = &rs_multiport[info->state->irq];
 
        ret.port_monitor = multi->port_monitor;
        
@@ -1815,7 +1899,7 @@ static int get_multiport_struct(struct async_struct * info,
        ret.mask4 = multi->mask4;
        ret.match4 = multi->match4;
 
-       ret.irq = info->irq;
+       ret.irq = info->state->irq;
 
        copy_to_user(retinfo,&ret,sizeof(*retinfo));
        return 0;
@@ -1827,6 +1911,7 @@ static int set_multiport_struct(struct async_struct * info,
 {
        struct serial_multiport_struct new_multi;
        struct rs_multiport_struct *multi;
+       struct serial_state *state;
        int     was_multi, now_multi;
        int     retval;
        void (*handler)(int, void *, struct pt_regs *);
@@ -1835,14 +1920,16 @@ static int set_multiport_struct(struct async_struct * info,
                return -EPERM;
        if (!in_multi)
                return -EFAULT;
+       state = info->state;
+       
        copy_from_user(&new_multi, in_multi,
                      sizeof(struct serial_multiport_struct));
 
-       if (new_multi.irq != info->irq || info->irq == 0 ||
-           !IRQ_ports[info->irq])
+       if (new_multi.irq != state->irq || state->irq == 0 ||
+           !IRQ_ports[state->irq])
                return -EINVAL;
 
-       multi = &rs_multiport[info->irq];
+       multi = &rs_multiport[state->irq];
        was_multi = (multi->port1 != 0);
        
        multi->port_monitor = new_multi.port_monitor;
@@ -1881,15 +1968,15 @@ static int set_multiport_struct(struct async_struct * info,
 
        now_multi = (multi->port1 != 0);
        
-       if (IRQ_ports[info->irq]->next_port &&
+       if (IRQ_ports[state->irq]->next_port &&
            (was_multi != now_multi)) {
-               free_irq(info->irq, NULL);
+               free_irq(state->irq, NULL);
                if (now_multi)
                        handler = rs_interrupt_multi;
                else
                        handler = rs_interrupt;
 
-               retval = request_irq(info->irq, handler, IRQ_T(info),
+               retval = request_irq(state->irq, handler, IRQ_T(info),
                                     "serial", NULL);
                if (retval) {
                        printk("Couldn't reallocate serial interrupt "
@@ -2036,7 +2123,8 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file,
                 */
                 case TIOCMIWAIT:
                        cli();
-                       cprev = info->icount;   /* note the counters on entry */
+                       /* note the counters on entry */
+                       cprev = info->state->icount;
                        sti();
                        while (1) {
                                interruptible_sleep_on(&info->delta_msr_wait);
@@ -2044,7 +2132,7 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file,
                                if (current->signal & ~current->blocked)
                                        return -ERESTARTSYS;
                                cli();
-                               cnow = info->icount;    /* atomic copy */
+                               cnow = info->state->icount; /* atomic copy */
                                sti();
                                if (cnow.rng == cprev.rng && cnow.dsr == cprev.dsr && 
                                    cnow.dcd == cprev.dcd && cnow.cts == cprev.cts)
@@ -2071,7 +2159,7 @@ static int rs_ioctl(struct tty_struct *tty, struct file * file,
                        if (error)
                                return error;
                        cli();
-                       cnow = info->icount;
+                       cnow = info->state->icount;
                        sti();
                        p_cuser = (struct serial_icounter_struct *) arg;
                        put_user(cnow.cts, &p_cuser->cts);
@@ -2097,6 +2185,14 @@ static void rs_set_termios(struct tty_struct *tty, struct termios *old_termios)
 
        change_speed(info);
 
+       if (!(old_termios->c_cflag & CBAUD) &&
+           (tty->termios->c_cflag & CBAUD)) {
+               info->MCR |= UART_MCR_DTR;
+               info->MCR_noint |= UART_MCR_DTR;
+               cli();
+               serial_out(info, UART_MCR, info->MCR);
+               sti();
+       }
        if ((old_termios->c_cflag & CRTSCTS) &&
            !(tty->termios->c_cflag & CRTSCTS)) {
                tty->hw_stopped = 0;
@@ -2129,11 +2225,13 @@ static void rs_set_termios(struct tty_struct *tty, struct termios *old_termios)
 static void rs_close(struct tty_struct *tty, struct file * filp)
 {
        struct async_struct * info = (struct async_struct *)tty->driver_data;
+       struct serial_state *state;
        unsigned long flags;
-       unsigned long timeout;
 
        if (!info || serial_paranoia_check(info, tty->device, "rs_close"))
                return;
+
+       state = info->state;
        
        save_flags(flags); cli();
        
@@ -2145,26 +2243,26 @@ static void rs_close(struct tty_struct *tty, struct file * filp)
        }
        
 #ifdef SERIAL_DEBUG_OPEN
-       printk("rs_close ttys%d, count = %d\n", info->line, info->count);
+       printk("rs_close ttys%d, count = %d\n", info->line, state->count);
 #endif
-       if ((tty->count == 1) && (info->count != 1)) {
+       if ((tty->count == 1) && (state->count != 1)) {
                /*
                 * Uh, oh.  tty->count is 1, which means that the tty
-                * structure will be freed.  Info->count should always
+                * structure will be freed.  state->count should always
                 * be one in these conditions.  If it's greater than
                 * one, we've got real problems, since it means the
                 * serial port won't be shutdown.
                 */
                printk("rs_close: bad serial port count; tty->count is 1, "
-                      "info->count is %d\n", info->count);
-               info->count = 1;
+                      "state->count is %d\n", state->count);
+               state->count = 1;
        }
-       if (--info->count < 0) {
+       if (--state->count < 0) {
                printk("rs_close: bad serial port count for ttys%d: %d\n",
-                      info->line, info->count);
-               info->count = 0;
+                      info->line, state->count);
+               state->count = 0;
        }
-       if (info->count) {
+       if (state->count) {
                DBG_CNT("before DEC-2");
                MOD_DEC_USE_COUNT;
                restore_flags(flags);
@@ -2176,9 +2274,9 @@ static void rs_close(struct tty_struct *tty, struct file * filp)
         * separate termios for callout and dialin.
         */
        if (info->flags & ASYNC_NORMAL_ACTIVE)
-               info->normal_termios = *tty->termios;
+               info->state->normal_termios = *tty->termios;
        if (info->flags & ASYNC_CALLOUT_ACTIVE)
-               info->callout_termios = *tty->termios;
+               info->state->callout_termios = *tty->termios;
        /*
         * Now we wait for the transmit buffer to clear; and we notify 
         * the line discipline to only process XON/XOFF characters.
@@ -2201,14 +2299,7 @@ static void rs_close(struct tty_struct *tty, struct file * filp)
                 * has completely drained; this is especially
                 * important if there is a transmit FIFO!
                 */
-               timeout = jiffies+HZ;
-               while (!(serial_inp(info, UART_LSR) & UART_LSR_TEMT)) {
-                       current->state = TASK_INTERRUPTIBLE;
-                       current->timeout = jiffies + info->timeout;
-                       schedule();
-                       if (jiffies > timeout)
-                               break;
-               }
+               rs_wait_until_sent(tty, HZ);
        }
        shutdown(info);
        if (tty->driver.flush_buffer)
@@ -2233,20 +2324,50 @@ static void rs_close(struct tty_struct *tty, struct file * filp)
        restore_flags(flags);
 }
 
+/*
+ * rs_wait_until_sent() --- wait until the transmitter is empty
+ */
+static void rs_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+       struct async_struct * info = (struct async_struct *)tty->driver_data;
+       unsigned long orig_jiffies;
+       
+       if (serial_paranoia_check(info, tty->device, "rs_wait_until_sent"))
+               return;
+
+       orig_jiffies = jiffies;
+       current->state = TASK_INTERRUPTIBLE;
+       current->counter = 0;   /* make us low-priority */
+       while (!(serial_inp(info, UART_LSR) & UART_LSR_TEMT)) {
+               current->timeout = jiffies + info->timeout;
+               schedule();
+               if (current->signal & ~current->blocked)
+                       break;
+               if (timeout && ((orig_jiffies + timeout) > jiffies))
+                       break;
+               if (jiffies > timeout)
+                       break;
+       }
+       current->state = TASK_RUNNING;
+}
+
 /*
  * rs_hangup() --- called by tty_hangup() when a hangup is signaled.
  */
 void rs_hangup(struct tty_struct *tty)
 {
        struct async_struct * info = (struct async_struct *)tty->driver_data;
+       struct serial_state *state = info->state;
        
        if (serial_paranoia_check(info, tty->device, "rs_hangup"))
                return;
+
+       state = info->state;
        
        rs_flush_buffer(tty);
        shutdown(info);
        info->event = 0;
-       info->count = 0;
+       state->count = 0;
        info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CALLOUT_ACTIVE);
        info->tty = 0;
        wake_up_interruptible(&info->open_wait);
@@ -2261,6 +2382,7 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
                           struct async_struct *info)
 {
        struct wait_queue wait = { current, NULL };
+       struct serial_state *state = info->state;
        int             retval;
        int             do_clocal = 0;
 
@@ -2314,7 +2436,7 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
        }
 
        if (info->flags & ASYNC_CALLOUT_ACTIVE) {
-               if (info->normal_termios.c_cflag & CLOCAL)
+               if (state->normal_termios.c_cflag & CLOCAL)
                        do_clocal = 1;
        } else {
                if (tty->termios->c_cflag & CLOCAL)
@@ -2324,7 +2446,7 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
        /*
         * Block waiting for the carrier detect and the line to become
         * free (i.e., not in use by the callout).  While we are in
-        * this loop, info->count is dropped by one, so that
+        * this loop, state->count is dropped by one, so that
         * rs_close() knows when to free things.  We restore it upon
         * exit, either normal or abnormal.
         */
@@ -2332,11 +2454,11 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
        add_wait_queue(&info->open_wait, &wait);
 #ifdef SERIAL_DEBUG_OPEN
        printk("block_til_ready before block: ttys%d, count = %d\n",
-              info->line, info->count);
+              state->line, state->count);
 #endif
        cli();
        if (!tty_hung_up_p(filp)) 
-               info->count--;
+               state->count--;
        sti();
        info->blocked_open++;
        while (1) {
@@ -2370,24 +2492,60 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
                }
 #ifdef SERIAL_DEBUG_OPEN
                printk("block_til_ready blocking: ttys%d, count = %d\n",
-                      info->line, info->count);
+                      info->line, state->count);
 #endif
                schedule();
        }
        current->state = TASK_RUNNING;
        remove_wait_queue(&info->open_wait, &wait);
        if (!tty_hung_up_p(filp))
-               info->count++;
+               state->count++;
        info->blocked_open--;
 #ifdef SERIAL_DEBUG_OPEN
        printk("block_til_ready after blocking: ttys%d, count = %d\n",
-              info->line, info->count);
+              info->line, state->count);
 #endif
        if (retval)
                return retval;
        info->flags |= ASYNC_NORMAL_ACTIVE;
        return 0;
-}      
+}
+
+int get_async_struct(int line, struct async_struct **ret_info)
+{
+       struct async_struct *info;
+       struct serial_state *sstate;
+
+       sstate = rs_table + line;
+       sstate->count++;
+       if (sstate->info) {
+               *ret_info = sstate->info;
+               return 0;
+       }
+       info = kmalloc(sizeof(struct async_struct), GFP_KERNEL);
+       if (!info) {
+               sstate->count--;
+               return -ENOMEM;
+       }
+       memset(info, 0, sizeof(struct async_struct));
+       info->magic = SERIAL_MAGIC;
+       info->port = sstate->port;
+       info->flags = sstate->flags;
+       info->xmit_fifo_size = sstate->xmit_fifo_size;
+       info->line = line;
+       info->tqueue.routine = do_softint;
+       info->tqueue.data = info;
+       info->tqueue_hangup.routine = do_serial_hangup;
+       info->tqueue_hangup.data = info;
+       info->state = sstate;
+       if (sstate->info) {
+               kfree_s(info, sizeof(struct async_struct));
+               *ret_info = sstate->info;
+               return 0;
+       }
+       *ret_info = sstate->info = info;
+       return 0;
+}
 
 /*
  * This routine is called whenever a serial port is opened.  It
@@ -2404,15 +2562,16 @@ int rs_open(struct tty_struct *tty, struct file * filp)
        line = MINOR(tty->device) - tty->driver.minor_start;
        if ((line < 0) || (line >= NR_PORTS))
                return -ENODEV;
-       info = rs_table + line;
+       retval = get_async_struct(line, &info);
+       if (retval)
+               return retval;
        if (serial_paranoia_check(info, tty->device, "rs_open"))
                return -ENODEV;
 
 #ifdef SERIAL_DEBUG_OPEN
        printk("rs_open %s%d, count = %d\n", tty->driver.name, info->line,
-              info->count);
+              info->state->count);
 #endif
-       info->count++;
        tty->driver_data = info;
        info->tty = tty;
 
@@ -2443,11 +2602,12 @@ int rs_open(struct tty_struct *tty, struct file * filp)
                return retval;
        }
 
-       if ((info->count == 1) && (info->flags & ASYNC_SPLIT_TERMIOS)) {
+       if ((info->state->count == 1) &&
+           (info->flags & ASYNC_SPLIT_TERMIOS)) {
                if (tty->driver.subtype == SERIAL_TYPE_NORMAL)
-                       *tty->termios = info->normal_termios;
+                       *tty->termios = info->state->normal_termios;
                else 
-                       *tty->termios = info->callout_termios;
+                       *tty->termios = info->state->callout_termios;
                change_speed(info);
        }
 
@@ -2587,16 +2747,18 @@ static int do_auto_irq(struct async_struct * info)
  * whether or not this UART is a 16550A or not, since this will
  * determine whether or not we can use its FIFO features or not.
  */
-static void autoconfig(struct async_struct * info)
+static void autoconfig(struct serial_state * state)
 {
        unsigned char status1, status2, scratch, scratch2;
-       unsigned port = info->port;
+       struct async_struct *info, scr_info;
        unsigned long flags;
 
-       info->type = PORT_UNKNOWN;
+       state->type = PORT_UNKNOWN;
        
-       if (!port)
+       if (!state->port)
                return;
+       info = &scr_info;       /* This is just for serial_{in,out} */
+       info->port = state->port; 
 
        save_flags(flags); cli();
        
@@ -2628,7 +2790,7 @@ static void autoconfig(struct async_struct * info)
         * manufacturer would be stupid enough to design a board
         * that conflicts with COM 1-4 --- we hope!
         */
-       if (!(info->flags & ASYNC_SKIP_TEST)) {
+       if (!(state->flags & ASYNC_SKIP_TEST)) {
                scratch = serial_inp(info, UART_MCR);
                serial_outp(info, UART_MCR, UART_MCR_LOOP | scratch);
                scratch2 = serial_inp(info, UART_MSR);
@@ -2646,39 +2808,52 @@ static void autoconfig(struct async_struct * info)
         * If the AUTO_IRQ flag is set, try to do the automatic IRQ
         * detection.
         */
-       if (info->flags & ASYNC_AUTO_IRQ)
-               info->irq = do_auto_irq(info);
+       if (state->flags & ASYNC_AUTO_IRQ)
+               state->irq = do_auto_irq(info);
                
        scratch2 = serial_in(info, UART_LCR);
-       serial_outp(info, UART_LCR, scratch2 | UART_LCR_DLAB);
+       serial_outp(info, UART_LCR, 0xBF); /* set up for StarTech test */
        serial_outp(info, UART_EFR, 0); /* EFR is the same as FCR */
-       serial_outp(info, UART_LCR, scratch2);
+       serial_outp(info, UART_LCR, 0);
        serial_outp(info, UART_FCR, UART_FCR_ENABLE_FIFO);
        scratch = serial_in(info, UART_IIR) >> 6;
-       info->xmit_fifo_size = 1;
        switch (scratch) {
                case 0:
-                       info->type = PORT_16450;
+                       state->type = PORT_16450;
                        break;
                case 1:
-                       info->type = PORT_UNKNOWN;
+                       state->type = PORT_UNKNOWN;
                        break;
                case 2:
-                       info->type = PORT_16550;
+                       state->type = PORT_16550;
                        break;
                case 3:
-                       serial_outp(info, UART_LCR, scratch2 | UART_LCR_DLAB);
-                       if (serial_in(info, UART_EFR) == 0) {
-                               info->type = PORT_16650;
-                               info->xmit_fifo_size = 32;
-                       } else {
-                               info->type = PORT_16550A;
-                               info->xmit_fifo_size = 16;
-                       }
-                       serial_outp(info, UART_LCR, scratch2);
+                       state->type = PORT_16550A;
                        break;
        }
-       if (info->type == PORT_16450) {
+       if (state->type == PORT_16550A) {
+               /* Check for Startech UART's */
+               serial_outp(info, UART_LCR, scratch2 | UART_LCR_DLAB);
+               if (serial_in(info, UART_EFR) == 0) {
+                       state->type = PORT_16650;
+               } else {
+                       serial_outp(info, UART_LCR, 0xBF);
+                       if (serial_in(info, UART_EFR) == 0)
+                               state->type = PORT_16650V2;
+               }
+       }
+       if (state->type == PORT_16550A) {
+               /* Check for TI 16750 */
+               serial_outp(info, UART_LCR, scratch2 | UART_LCR_DLAB);
+               serial_outp(info, UART_FCR,
+                           UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE);
+               scratch = serial_in(info, UART_IIR) >> 5;
+               if (scratch == 7)
+                       state->type = PORT_16750;
+               serial_outp(info, UART_FCR, UART_FCR_ENABLE_FIFO);
+       }
+       serial_outp(info, UART_LCR, scratch2);
+       if (state->type == PORT_16450) {
                scratch = serial_in(info, UART_SCR);
                serial_outp(info, UART_SCR, 0xa5);
                status1 = serial_in(info, UART_SCR);
@@ -2687,8 +2862,10 @@ static void autoconfig(struct async_struct * info)
                serial_outp(info, UART_SCR, scratch);
 
                if ((status1 != 0xa5) || (status2 != 0x5a))
-                       info->type = PORT_8250;
+                       state->type = PORT_8250;
        }
+       state->xmit_fifo_size = uart_config[state->type].dfl_xmit_fifo_size;
+
        request_region(info->port,8,"serial(auto)");
 
        /*
@@ -2726,7 +2903,7 @@ static struct symbol_table serial_syms = {
 int rs_init(void)
 {
        int i;
-       struct async_struct * info;
+       struct serial_state * state;
        
        init_bh(SERIAL_BH, do_serial_bh);
        timer_table[RS_TIMER].fn = rs_timer;
@@ -2773,10 +2950,12 @@ int rs_init(void)
        serial_driver.ioctl = rs_ioctl;
        serial_driver.throttle = rs_throttle;
        serial_driver.unthrottle = rs_unthrottle;
+       serial_driver.send_xchar = rs_send_xchar;
        serial_driver.set_termios = rs_set_termios;
        serial_driver.stop = rs_stop;
        serial_driver.start = rs_start;
        serial_driver.hangup = rs_hangup;
+       serial_driver.wait_until_sent = rs_wait_until_sent;
 
        /*
         * The callout device is just like normal device except for
@@ -2792,63 +2971,31 @@ int rs_init(void)
        if (tty_register_driver(&callout_driver))
                panic("Couldn't register callout driver\n");
        
-       for (i = 0, info = rs_table; i < NR_PORTS; i++,info++) {
-               info->magic = SERIAL_MAGIC;
-               info->line = i;
-               info->tty = 0;
-               info->type = PORT_UNKNOWN;
-               info->custom_divisor = 0;
-               info->close_delay = 5*HZ/10;
-               info->closing_wait = 30*HZ;
-               info->x_char = 0;
-               info->event = 0;
-               info->count = 0;
-               info->blocked_open = 0;
-               info->tqueue.routine = do_softint;
-               info->tqueue.data = info;
-               info->tqueue_hangup.routine = do_serial_hangup;
-               info->tqueue_hangup.data = info;
-               info->callout_termios =callout_driver.init_termios;
-               info->normal_termios = serial_driver.init_termios;
-               info->open_wait = 0;
-               info->close_wait = 0;
-               info->delta_msr_wait = 0;
-               info->icount.cts = info->icount.dsr = 
-                       info->icount.rng = info->icount.dcd = 0;
-               info->next_port = 0;
-               info->prev_port = 0;
-               if (info->irq == 2)
-                       info->irq = 9;
-               if (info->type == PORT_UNKNOWN) {
-                       if (!(info->flags & ASYNC_BOOT_AUTOCONF))
+       for (i = 0, state = rs_table; i < NR_PORTS; i++,state++) {
+               state->magic = SSTATE_MAGIC;
+               state->line = i;
+               state->type = PORT_UNKNOWN;
+               state->custom_divisor = 0;
+               state->close_delay = 5*HZ/10;
+               state->closing_wait = 30*HZ;
+               state->callout_termios = callout_driver.init_termios;
+               state->normal_termios = serial_driver.init_termios;
+               state->icount.cts = state->icount.dsr = 
+                       state->icount.rng = state->icount.dcd = 0;
+               if (state->irq == 2)
+                       state->irq = 9;
+               if (state->type == PORT_UNKNOWN) {
+                       if (!(state->flags & ASYNC_BOOT_AUTOCONF))
                                continue;
-                       autoconfig(info);
-                       if (info->type == PORT_UNKNOWN)
+                       autoconfig(state);
+                       if (state->type == PORT_UNKNOWN)
                                continue;
                }
-               printk(KERN_INFO "tty%02d%s at 0x%04x (irq = %d)", info->line, 
-                      (info->flags & ASYNC_FOURPORT) ? " FourPort" : "",
-                      info->port, info->irq);
-               switch (info->type) {
-                       case PORT_8250:
-                               printk(" is a 8250\n");
-                               break;
-                       case PORT_16450:
-                               printk(" is a 16450\n");
-                               break;
-                       case PORT_16550:
-                               printk(" is a 16550\n");
-                               break;
-                       case PORT_16550A:
-                               printk(" is a 16550A\n");
-                               break;
-                       case PORT_16650:
-                               printk(" is a 16650\n");
-                               break;
-                       default:
-                               printk("\n");
-                               break;
-               }
+               printk(KERN_INFO "tty%02d%s at 0x%04x (irq = %d) is a %s\n",
+                      state->line,
+                      (state->flags & ASYNC_FOURPORT) ? " FourPort" : "",
+                      state->port, state->irq,
+                      uart_config[state->type].name);
        }
        register_symtab(&serial_syms);
        return 0;
@@ -2862,7 +3009,7 @@ int register_serial(struct serial_struct *req)
 {
        int i;
        unsigned long flags;
-       struct async_struct *info;
+       struct serial_state *state;
 
        save_flags(flags);
        cli();
@@ -2880,51 +3027,40 @@ int register_serial(struct serial_struct *req)
                restore_flags(flags);
                return -1;
        }
-       info = &rs_table[i];
+       state = &rs_table[i];
        if (rs_table[i].count) {
                restore_flags(flags);
                printk("Couldn't configure serial #%d (port=%d,irq=%d): "
                       "device already open\n", i, req->port, req->irq);
                return -1;
        }
-       info->irq = req->irq;
-       info->port = req->port;
-       info->flags = req->flags;
-       autoconfig(info);
-       if (info->type == PORT_UNKNOWN) {
+       state->irq = req->irq;
+       state->port = req->port;
+       state->flags = req->flags;
+       autoconfig(state);
+       if (state->type == PORT_UNKNOWN) {
                restore_flags(flags);
                printk("register_serial(): autoconfig failed\n");
                return -1;
        }
-       printk(KERN_INFO "tty%02d at 0x%04x (irq = %d)", info->line, 
-              info->port, info->irq);
-       switch (info->type) {
-       case PORT_8250:
-               printk(" is a 8250\n"); break;
-       case PORT_16450:
-               printk(" is a 16450\n"); break;
-       case PORT_16550:
-               printk(" is a 16550\n"); break;
-       case PORT_16550A:
-               printk(" is a 16550A\n"); break;
-       default:
-               printk("\n"); break;
-       }
+       printk(KERN_INFO "tty%02d at 0x%04x (irq = %d) is a %s\n",
+              state->line, state->port, state->irq,
+              uart_config[state->type].name);
        restore_flags(flags);
-       return info->line;
+       return state->line;
 }
 
 void unregister_serial(int line)
 {
        unsigned long flags;
-       struct async_struct *info = &rs_table[line];
+       struct serial_state *state = &rs_table[line];
 
        save_flags(flags);
        cli();
-       if (info->tty)
-               tty_hangup(info->tty);
-       info->type = PORT_UNKNOWN;
-       printk(KERN_INFO "tty%02d unloaded\n", info->line);
+       if (state->info && state->info->tty)
+               tty_hangup(state->info->tty);
+       state->type = PORT_UNKNOWN;
+       printk(KERN_INFO "tty%02d unloaded\n", state->line);
        restore_flags(flags);
 }
 
index 55d6d21dc7dcde6a2d3109e6d84e7f831e08a5fc..f4a68812b69cae875f5efc04ded4b19a2be3c6c3 100644 (file)
@@ -62,11 +62,19 @@ void tty_wait_until_sent(struct tty_struct * tty, int timeout)
 #endif
                current->state = TASK_INTERRUPTIBLE;
                if (current->signal & ~current->blocked)
-                       break;
+                       goto stop_waiting;
                if (!tty->driver.chars_in_buffer(tty))
                        break;
                schedule();
        } while (current->timeout);
+       if (tty->driver.wait_until_sent) {
+               if (current->timeout == -1)
+                       timeout = 0;
+               else
+                       timeout = current->timeout - jiffies;
+               tty->driver.wait_until_sent(tty, timeout);
+       }
+stop_waiting:
        current->state = TASK_RUNNING;
        remove_wait_queue(&tty->write_wait, &wait);
 }
@@ -169,8 +177,11 @@ static int set_termios(struct tty_struct * tty, unsigned long arg, int opt)
        if ((opt & TERMIOS_FLUSH) && tty->ldisc.flush_buffer)
                tty->ldisc.flush_buffer(tty);
 
-       if (opt & TERMIOS_WAIT)
+       if (opt & TERMIOS_WAIT) {
                tty_wait_until_sent(tty, 0);
+               if (current->signal & ~current->blocked)
+                       return -EINTR;
+       }
 
        change_termios(tty, &tmp_termios);
        return 0;
@@ -371,6 +382,24 @@ static int set_ltchars(struct tty_struct * tty, struct ltchars * ltchars)
 }
 #endif
 
+/*
+ * Send a high priority character to the tty.
+ */
+void send_prio_char(struct tty_struct *tty, char ch)
+{
+       int     was_stopped = tty->stopped;
+
+       if (tty->driver.send_xchar) {
+               tty->driver.send_xchar(tty, ch);
+               return;
+       }
+       if (was_stopped)
+               start_tty(tty);
+       tty->driver.write(tty, 0, &ch, 1);
+       if (was_stopped)
+               stop_tty(tty);
+}
+
 int n_tty_ioctl(struct tty_struct * tty, struct file * file,
                       unsigned int cmd, unsigned long arg)
 {
@@ -440,13 +469,11 @@ int n_tty_ioctl(struct tty_struct * tty, struct file * file,
                                break;
                        case TCIOFF:
                                if (STOP_CHAR(tty) != __DISABLED_CHAR)
-                                       tty->driver.write(tty, 0,
-                                                         &STOP_CHAR(tty), 1);
+                                       send_prio_char(tty, STOP_CHAR(tty));
                                break;
                        case TCION:
                                if (START_CHAR(tty) != __DISABLED_CHAR)
-                                       tty->driver.write(tty, 0,
-                                                         &START_CHAR(tty), 1);
+                                       send_prio_char(tty, START_CHAR(tty));
                                break;
                        default:
                                return -EINVAL;
@@ -538,6 +565,8 @@ int n_tty_ioctl(struct tty_struct * tty, struct file * file,
                        if (retval)
                                return retval;
                        tty_wait_until_sent(tty, 0);
+                       if (current->signal & ~current->blocked)
+                               return -EINTR;
                        if (!tty->driver.ioctl)
                                return 0;
                        tty->driver.ioctl(tty, file, cmd, arg);
index 4c8e7c940a9938156a8efbaefe7ffdd4c64f0cd7..200b95c79401a4b17f2c4df31b80b2b28ac606df 100644 (file)
@@ -237,8 +237,6 @@ int el1_probe(struct device *dev)
 
 static int el1_probe1(struct device *dev, int ioaddr)
 {
-#ifndef MODULE
-
        const char *mname;              /* Vendor name */
        unsigned char station_addr[6];
        int autoirq = 0;
@@ -344,8 +342,6 @@ static int el1_probe1(struct device *dev, int ioaddr)
 
        ether_setup(dev);
 
-#endif /* !MODULE */
-
        return 0;
 }
 
index 36b58bff9e77cfbe789422804ac5dbcf07b0cfe4..0b6a7f0009ddda3a4901bd17395df69e9aa6eb2f 100644 (file)
@@ -634,7 +634,7 @@ vortex_open(struct device *dev)
        /* Switch to register set 7 for normal use. */
        EL3WINDOW(7);
 
-       /* Set reciever mode: presumably accept b-case and phys addr only. */
+       /* Set receiver mode: presumably accept b-case and phys addr only. */
        set_rx_mode(dev);
        outw(StatsEnable, ioaddr + EL3_CMD); /* Turn on statistics. */
 
index 65de51ed5466e0e000d9dabae5cbdda126037668..2f63ca5c2f7bd582da26a54bbcac9ddba7fb3442 100644 (file)
@@ -136,6 +136,7 @@ extern inline int lock_buffer (register struct ppp_buffer *buf);
 
 static int rcv_proto_ip                (struct ppp *, __u16, __u8 *, int);
 static int rcv_proto_ipx       (struct ppp *, __u16, __u8 *, int);
+static int rcv_proto_ipv6      (struct ppp *, __u16, __u8 *, int);
 static int rcv_proto_vjc_comp  (struct ppp *, __u16, __u8 *, int);
 static int rcv_proto_vjc_uncomp (struct ppp *, __u16, __u8 *, int);
 static int rcv_proto_unknown   (struct ppp *, __u16, __u8 *, int);
@@ -264,6 +265,7 @@ static
 ppp_proto_type proto_list[] = {
        { PPP_IP,         rcv_proto_ip         },
        { PPP_IPX,        rcv_proto_ipx        },
+       { PPP_IPV6,       rcv_proto_ipv6       },
        { PPP_VJC_COMP,   rcv_proto_vjc_comp   },
        { PPP_VJC_UNCOMP, rcv_proto_vjc_uncomp },
        { PPP_LQR,        rcv_proto_lqr        },
@@ -1238,6 +1240,18 @@ rcv_proto_ipx (struct ppp *ppp, __u16 proto, __u8 * data, int count)
        return 0;
 }
 
+/*
+ * Process the receipt of an IPV6 frame
+ */
+
+static int
+rcv_proto_ipv6 (struct ppp *ppp, __u16 proto, __u8 * data, int count)
+{
+       if (((ppp2dev (ppp)->flags & IFF_UP) != 0) && (count > 0))
+               return ppp_rcv_rx (ppp, htons (ETH_P_IPV6), data, count);
+       return 0;
+}
+
 /*
  * Process the receipt of an VJ Compressed frame
  */
@@ -3104,6 +3118,10 @@ ppp_dev_xmit (sk_buff *skb, struct device *dev)
                answer = ppp_dev_xmit_ip (dev, ppp, data);
                break;
 
+       case ETH_P_IPV6:
+               answer = ppp_dev_xmit_ipx (dev, ppp, data, len, PPP_IPV6);
+               break;
+
        default: /* All others have no support at this time. */
                dev_kfree_skb (skb, FREE_WRITE);
                return 0;
index 90ce9555cb591c8b215170dd53f19298d003f9bf..085f31c6b8cae3987901dec0d668ed0004355f65 100644 (file)
@@ -70,6 +70,7 @@ struct pci_dev_info dev_info[] = {
        DEVICE( DEC,            DEC_FDDI,       "DEFPA"),
        DEVICE( DEC,            DEC_TULIP_PLUS, "DC21041"),
        DEVICE( DEC,            DEC_21052_AB,   "DC21052-AB"),
+       DEVICE( DEC,            DEC_21152_AA,   "DC21152-AA"),
        DEVICE( CIRRUS,         CIRRUS_5430,    "GD 5430"),
        DEVICE( CIRRUS,         CIRRUS_5434_4,  "GD 5434"),
        DEVICE( CIRRUS,         CIRRUS_5434_8,  "GD 5434"),
index d52a3b574a00e08420096f4721e658876d28d12e..b5d3c42eada5872c39cd63d9096a1a207fbfb1c6 100644 (file)
@@ -735,8 +735,9 @@ static void do_self(unsigned char value, char up_flag)
 #define A_CFLEX  '^'
 #define A_TILDE  '~'
 #define A_DIAER  '"'
-static unsigned char ret_diacr[] =
-       {A_GRAVE, A_ACUTE, A_CFLEX, A_TILDE, A_DIAER };
+#define A_CEDIL  ','
+static unsigned char ret_diacr[NR_DEAD] =
+       {A_GRAVE, A_ACUTE, A_CFLEX, A_TILDE, A_DIAER, A_CEDIL };
 
 /* If a dead key pressed twice, output a character corresponding to it,        */
 /* otherwise just remember the dead key.                               */
index 983cf8b15b9218a3791734fb349426df1667bb8b..d7372cf56b893a4944272930b460943421d6d40b 100644 (file)
@@ -1,3 +1,21 @@
+Sun Oct 27 22:00 1996 Gerard Roudier (groudier@club-internet.fr)
+       * ncr53c8xx.c ncr53c8xx.h - revision 1.14b
+       Add the following config parameters:
+
+       - CONFIG_SCSI_NCR53C8XX_MAX_TAGS
+         Max number of queued tagged commands.
+         Allow from 2 to 12, default 4.
+
+       - CONFIG_SCSI_NCR53C8XX_SYNC
+         Synchronous transfers frequency in MHz.
+         Allow from 5 to 10, default 5, 0 means asynchronous.
+         (And so remove CONFIG_SCSI_NCR53C8XX_FORCE_ASYNCHRONOUS)
+
+Sun Oct 20 16:00 1996 Gerard Roudier (groudier@club-internet.fr)
+       * ncr53c8xx.c
+       ncr_scatter() rewritten.
+       remove "ncr dead" detection.
 Sun Oct 13 19:00 1996 Gerard Roudier (groudier@club-internet.fr)
        * ncr53c8xx.c ncr53c8xx.h - revision 1.14a
        Enabling some special features makes problems with some hardware.
index 424c73987a8595dda99d74d1d53415985144ebb8..48f91cd22340adcb72155767fded423a84ce2e24 100644 (file)
@@ -48,14 +48,17 @@ if [ "$CONFIG_PCI" = "y" -a "$CONFIG_SCSI_NCR53C7xx" != "y" ]; then
   dep_tristate 'NCR53C8XX SCSI support' CONFIG_SCSI_NCR53C8XX $CONFIG_SCSI
   if [ "$CONFIG_SCSI_NCR53C8XX" != "n" ]; then
     bool '  enable tagged command queueing' CONFIG_SCSI_NCR53C8XX_TAGGED_QUEUE
-    bool '  force normal IO' CONFIG_SCSI_NCR53C8XX_IOMAPPED
-    bool '  not allow targets to disconnect' CONFIG_SCSI_NCR53C8XX_NO_DISCONNECT
-    bool '  force asynchronous transfer mode' CONFIG_SCSI_NCR53C8XX_FORCE_ASYNCHRONOUS
-    bool '  force synchronous negotiation' CONFIG_SCSI_NCR53C8XX_FORCE_SYNC_NEGO
+    bool '  use normal IO' CONFIG_SCSI_NCR53C8XX_IOMAPPED
+    int  '  maximum number of queued commands' CONFIG_SCSI_NCR53C8XX_MAX_TAGS 4
+    int  '  synchronous transfers frequency in MHz' CONFIG_SCSI_NCR53C8XX_SYNC 5
+    if [ "$CONFIG_SCSI_NCR53C8XX_TAGGED_QUEUE" != "y" ]; then
+      bool '  not allow targets to disconnect' CONFIG_SCSI_NCR53C8XX_NO_DISCONNECT
+    fi
   fi
   if [ "$CONFIG_SCSI_NCR53C8XX" != "n" -a "$CONFIG_EXPERIMENTAL" = "y" ]; then
     bool '  disable master parity checking' CONFIG_SCSI_NCR53C8XX_DISABLE_MPARITY_CHECK
     bool '  disable scsi parity checking' CONFIG_SCSI_NCR53C8XX_DISABLE_PARITY_CHECK
+    bool '  force synchronous negotiation' CONFIG_SCSI_NCR53C8XX_FORCE_SYNC_NEGO
   fi
 fi
 dep_tristate 'IOMEGA Parallel Port ZIP drive SCSI support' CONFIG_SCSI_PPA $CONFIG_SCSI
index 6514922818a6f7d1e0ad783b6cd40989bea0ae79..44ca2d0fc46f8fc9c5c626ed3a178cb4749c3374 100644 (file)
@@ -4,7 +4,7 @@ Written by Gerard Roudier <groudier@club-internet.fr>
 21 Rue Carnot
 95170 DEUIL LA BARRE - FRANCE
 
-13 October 1996
+28 October 1996
 ===============================================================================
 
 1.  Introduction
@@ -52,9 +52,7 @@ This short documentation only describes the features of the NCR53C8XX driver,
 configuration parameters and control commands available through the proc SCSI
 file system read / write operations.
 
-This driver has been tested OK with linux/i386  and is currently untested
-under linux/Alpha. If you intend to use this driver under linux/Alpha, just 
-try it first with read-only or mounted read-only devices.
+This driver has been tested OK with linux/i386 and Linux/Alpha.
 
 I am not a native speaker of English and there are probably lots of 
 mistakes in this README file. Any help will be welcome.
@@ -81,7 +79,7 @@ Chip   SDMS BIOS   Wide   Ultra SCSI   the driver      the driver
 815        Y         N        N            Y             Y
 825        Y         Y        N            Y             Y
 825A       Y         Y        N            Y             Not yet
-860        N         Y        N            Y             Y
+860        N         N        Y(1)         Y             Y
 875        Y         Y        Y(1)         Y             Y
 
 (1) Ultra SCSI extensions will be supported in a future release of the 
@@ -356,9 +354,16 @@ CONFIG_SCSI_NCR53C8XX_TAGGED_QUEUE    (default answer: n)
     Answer "y" if you are sure that all your SCSI devices that are able to 
     accept tagged commands will proceed safely.
 
-CONFIG_SCSI_NCR53C8XX_FORCE_ASYNCHRONOUS (default answer: n)
-    This option forces asynchronous transfer mode for all SCSI devices. 
+CONFIG_SCSI_NCR53C8XX_MAX_TAGS         (default answer: 4)
+    This option allows you to specify the maximum number of tagged commands 
+    that can be queued to a device.
+
+CONFIG_SCSI_NCR53C8XX_SYNC            (default answer: 5)
+    This option allows you to specify the frequency in MHz the driver 
+    will use at boot time for synchronous data transfer negotiations.
+    This frequency can be changed later with the "setsync" control command.
+    0 means "asynchronous data transfers".
+
 CONFIG_SCSI_NCR53C8XX_FORCE_SYNC_NEGO (default answer: n)
     Force synchronous negotiation for all SCSI-2 devices.
     Some SCSI-2 devices do not report this feature in byte 7 of inquiry 
@@ -377,9 +382,12 @@ To change other "defines", you must edit the header file.
 Do that only if you know what you are doing.
 
 SCSI_NCR_TRUST_BIOS_SETTING    (default: not defined)
-       If defined, the driver will preserve features bits in 
-       dmode/dcntl/ctest3/ctest4 io register.
-       Else, it will enable features according to chip and revision id.
+       If defined, the driver will preserve features bits from  
+       dmode/dcntl/ctest3/ctest4 io registers.
+
+SCSI_NCR_SPECIAL_FEATURES      (default: not defined)
+       If defined, the driver will enable some special features according  
+       to chip and revision id.
 
 SCSI_NCR_IOMAPPED              (default: not defined)
        If defined, normal I/O is forced.
@@ -391,6 +399,11 @@ SCSI_NCR_MAX_TAGS          (default: 4)
        Maximum number of simultaneous tagged commands to a device.
        Can be changed by "settags <target> <maxtags>"
 
+SCSI_NCR_DEFAULT_SYNC           (default: 5)
+       Frequency in KHz the driver will use at boot time for synchronous 
+       negotiation. 0 means asynchronous.
+       Can be changed by "setsync <target> <period>"
+
 SCSI_NCR_DEFAULT_TAGS          (default: 4)
        Default number of simultaneous tagged commands to a device.
        < 1 means tagged command queuing disabled at start-up.
@@ -399,10 +412,6 @@ SCSI_NCR_ALWAYS_SIMPLE_TAG (default: defined)
        Use SIMPLE TAG for read and write commands.
        Can be changed by "setorder <ordered|simple|default>"
 
-SCSI_NCR_TAGGED_QUEUE_DISABLED (default: defined)
-       If defined, tagged command queuing is disable at start-up.
-       Can be changed by "settags <target> <maxtags>"
-
 SCSI_NCR_NO_DISCONNECT         (default: not defined)
        If defined, targets are not allowed to disconnect.
 
@@ -483,7 +492,7 @@ Prior to installing the driver, you must untar the distribution, as follow:
 
        mkdir ncrB2L
        cd ncrB2L
-       tar zxvf ncrBsd2Linux-1.14a-src.tar.gz
+       tar zxvf ncrBsd2Linux-1.14b-src.tar.gz
 
 
 12. Installation procedure for Linux version 1
index b87017773d12b1df9f8d80c743fd715f3840e422..f4de28b59b5ed79a564d19ab652d2f33bb92823d 100644 (file)
@@ -1035,7 +1035,7 @@ int aha1542_detect(Scsi_Host_Template * tpnt)
                    shpnt->dma_channel = dma_chan;
                    shpnt->irq = irq_level;
                    HOSTDATA(shpnt)->bios_translation  = trans;
-                   if(trans == 2
+                   if(trans == BIOS_TRANSLATION_25563
                      printk("aha1542.c: Using extended bios translation\n");
                    HOSTDATA(shpnt)->aha1542_last_mbi_used  = (2*AHA1542_MAILBOXES - 1);
                    HOSTDATA(shpnt)->aha1542_last_mbo_used  = (AHA1542_MAILBOXES - 1);
@@ -1308,8 +1308,8 @@ int aha1542_biosparam(Scsi_Disk * disk, kdev_t dev, int * ip)
   int size = disk->capacity;
 
   translation_algorithm = HOSTDATA(disk->device->host)->bios_translation;
-  /* Should this be > 1024, or >= 1024?  Enquiring minds want to know. */
-  if((size>>11) > 1024 && translation_algorithm == 2) {
+
+  if((size>>11) > 1024 && translation_algorithm == BIOS_TRANSLATION_25563) {
     /* Please verify that this is the same as what DOS returns */
     ip[0] = 255;
     ip[1] = 63;
@@ -1318,8 +1318,8 @@ int aha1542_biosparam(Scsi_Disk * disk, kdev_t dev, int * ip)
     ip[0] = 64;
     ip[1] = 32;
     ip[2] = size >> 11;
-  };
-/*  if (ip[2] >= 1024) ip[2] = 1024; */
+  }
+
   return 0;
 }
 
index 8e2ef88cd41605233039c1f7c12229cec7669370..4c053a30c41f15297343280bc812a66b5b616a80 100644 (file)
@@ -94,23 +94,21 @@ struct proc_dir_entry proc_scsi_dtc = {
 
 
 static struct override {
-   unsigned char *address;
+   unsigned int address;
    int irq;
 } overrides
 #ifdef OVERRIDE
 [] = OVERRIDE;
 #else
-[4] = {{NULL, IRQ_AUTO}, {NULL, IRQ_AUTO}, {NULL, IRQ_AUTO},
-     {NULL, IRQ_AUTO}};
+[4] = {{0, IRQ_AUTO}, {0, IRQ_AUTO}, {0, IRQ_AUTO}, {0, IRQ_AUTO}};
 #endif
 
 #define NO_OVERRIDES (sizeof(overrides) / sizeof(struct override))
 
 static struct base {
-   unsigned char *address;
+   unsigned int address;
    int noauto;
-} bases[] = {{(unsigned char *) 0xcc000, 0}, {(unsigned char *) 0xc8000, 0},
-{(unsigned char *) 0xdc000, 0}, {(unsigned char *) 0xd8000, 0}};
+} bases[] = {{0xcc000, 0}, {0xc8000, 0}, {0xdc000, 0}, {0xd8000, 0}};
 
 #define NO_BASES (sizeof (bases) / sizeof (struct base))
 
@@ -138,10 +136,10 @@ void dtc_setup(char *str, int *ints) {
       printk("dtc_setup: usage dtc=address,irq\n");
    else
       if (commandline_current < NO_OVERRIDES) {
-      overrides[commandline_current].address = (unsigned char *) ints[1];
+      overrides[commandline_current].address = ints[1];
       overrides[commandline_current].irq = ints[2];
       for (i = 0; i < NO_BASES; ++i)
-        if (bases[i].address == (unsigned char *) ints[1]) {
+        if (bases[i].address == ints[1]) {
         bases[i].noauto = 1;
         break;
       }
@@ -166,25 +164,26 @@ void dtc_setup(char *str, int *ints) {
 int dtc_detect(Scsi_Host_Template * tpnt) {
    static int current_override = 0, current_base = 0;
    struct Scsi_Host *instance;
-   unsigned char *base;
+   unsigned int base;
    int sig, count;
 
    tpnt->proc_dir = &proc_scsi_dtc;
    tpnt->proc_info = &dtc_proc_info;
 
    for (count = 0; current_override < NO_OVERRIDES; ++current_override) {
-      base = NULL;
+      base = 0;
 
       if (overrides[current_override].address)
         base = overrides[current_override].address;
       else
         for (; !base && (current_base < NO_BASES); ++current_base) {
 #if (DTCDEBUG & DTCDEBUG_INIT)
-        printk("scsi : probing address %08x\n", (unsigned int) bases[current_base].address);
+        printk("scsi : probing address %08x\n", bases[current_base].address);
 #endif
         for (sig = 0; sig < NO_SIGNATURES; ++sig)
-           if (!bases[current_base].noauto && !memcmp
-             (bases[current_base].address + signatures[sig].offset,
+           if (!bases[current_base].noauto && 
+               check_signature(bases[current_base].address +
+                               signatures[sig].offset,
              signatures[sig].string, strlen(signatures[sig].string))) {
            base = bases[current_base].address;
 #if (DTCDEBUG & DTCDEBUG_INIT)
@@ -195,14 +194,14 @@ int dtc_detect(Scsi_Host_Template * tpnt) {
       }
 
 #if defined(DTCDEBUG) && (DTCDEBUG & DTCDEBUG_INIT)
-      printk("scsi-dtc : base = %08x\n", (unsigned int) base);
+      printk("scsi-dtc : base = %08x\n", base);
 #endif
 
       if (!base)
         break;
 
       instance = scsi_register (tpnt, sizeof(struct NCR5380_hostdata));
-      instance->base = base;
+      instance->base = (void *)base;
 
       NCR5380_init(instance, 0);
 
@@ -282,6 +281,7 @@ int dtc_biosparam(Disk * disk, kdev_t dev, int * ip)
    return 0;
 }
 
+
 /****************************************************************
  * Function : int NCR5380_pread (struct Scsi_Host *instance, 
  *     unsigned char *dst, int len)
@@ -320,7 +320,7 @@ static inline int NCR5380_pread (struct Scsi_Host *instance,
       while (NCR5380_read(DTC_CONTROL_REG) & CSR_HOST_BUF_NOT_RDY)
         ++i;
       rtrc(3);
-      memcpy(d, (char *)(base + DTC_DATA_BUF), 128);
+      memcpy_fromio(d, base + DTC_DATA_BUF, 128);
       d += 128;
       len -= 128;
       rtrc(7); /*** with int's on, it sometimes hangs after here.
@@ -370,7 +370,7 @@ static inline int NCR5380_pwrite (struct Scsi_Host *instance,
       while (NCR5380_read(DTC_CONTROL_REG) & CSR_HOST_BUF_NOT_RDY)
         ++i;
       rtrc(3);
-      memcpy((char *)(base + DTC_DATA_BUF), src, 128);
+      memcpy_toio(base + DTC_DATA_BUF, src, 128);
       src += 128;
       len -= 128;
    }
index 4c41237b1d5200c0c4e7a65e63c326f2135ae665..0f1fcf0874af75b7febb6850bb4649b87d4217f2 100644 (file)
@@ -6,7 +6,7 @@
  *     drew@colorado.edu
  *      +1 (303) 440-4894
  *
- * DISTRIBUTION RELEASE 1
+ * DISTRIBUTION RELEASE 2
  *
  * For more information, please consult 
  *
@@ -28,7 +28,7 @@
 #ifndef DTC3280_H
 #define DTC3280_H
 
-#define DTC_PUBLIC_RELEASE 1
+#define DTC_PUBLIC_RELEASE 2
 
 /*#define DTCDEBUG 0x1*/
 #define DTCDEBUG_INIT  0x1
@@ -116,40 +116,40 @@ int dtc_proc_info (char *buffer, char **start, off_t offset,
 #ifndef HOSTS_C
 
 #define NCR5380_implementation_fields \
-    volatile unsigned char *base
+    volatile unsigned int base
 
 #define NCR5380_local_declare() \
-    volatile unsigned char *base
+    volatile unsigned int base
 
 #define NCR5380_setup(instance) \
-    base = (volatile unsigned char *) (instance)->base
+    base = (unsigned int)(instance)->base
 
 #define DTC_address(reg) (base + DTC_5380_OFFSET + reg)
 
 #define dbNCR5380_read(reg)                                              \
-    (rval=*(DTC_address(reg)), \
+    (rval=readb(DTC_address(reg)), \
      (((unsigned char) printk("DTC : read register %d at addr %08x is: %02x\n"\
     , (reg), (int)DTC_address(reg), rval)), rval ) )
 
 #define dbNCR5380_write(reg, value) do {                                  \
     printk("DTC : write %02x to register %d at address %08x\n",         \
             (value), (reg), (int)DTC_address(reg));     \
-    *(DTC_address(reg)) = (value);} while(0)
+    writeb(value, DTC_address(reg));} while(0)
 
 
 #if !(DTCDEBUG & DTCDEBUG_TRANSFER) 
-#define NCR5380_read(reg) (*(DTC_address(reg)))
-#define NCR5380_write(reg, value) (*(DTC_address(reg)) = (value))
+#define NCR5380_read(reg) (readb(DTC_address(reg)))
+#define NCR5380_write(reg, value) (writeb(value, DTC_address(reg)))
 #else
-#define NCR5380_read(reg) (*(DTC_address(reg)))
+#define NCR5380_read(reg) (readb(DTC_address(reg)))
 #define xNCR5380_read(reg)                                             \
     (((unsigned char) printk("DTC : read register %d at address %08x\n"\
-    , (reg), DTC_address(reg))), *(DTC_address(reg)))
+    , (reg), DTC_address(reg))), readb(DTC_address(reg)))
 
 #define NCR5380_write(reg, value) do {                                 \
     printk("DTC : write %02x to register %d at address %08x\n",        \
            (value), (reg), (int)DTC_address(reg));     \
-    *(DTC_address(reg)) = (value);             } while(0)
+    writeb(value, DTC_address(reg));} while(0)
 #endif
 
 #define NCR5380_intr dtc_intr
index 9e91f5a3ee08b3666aa430ba1a8f4ccf0ff253b1..146696120360d2244fe3760abd2adcece743fa66 100644 (file)
@@ -469,7 +469,7 @@ struct signature {
    { "IBM F1 P2 BIOS v1.0104/29/93",                        5, 28,  3, -1, 0 },
    { "Future Domain Corp. V1.0008/18/93",                   5, 33,  3,  4, 0 },
    { "Future Domain Corp. V1.0008/18/93",                  26, 33,  3,  4, 1 },
-   { "Adaptec AHA-2920 PCI-SCSI Card",                     42, 31,  3,  0, 1 },
+   { "Adaptec AHA-2920 PCI-SCSI Card",                     42, 31,  3, -1, 1 },
                                /* This next signature may not be a 3.5 bios */
    { "Future Domain Corp. V2.0108/18/93",                   5, 33,  3,  5, 0 },
    { "FUTURE DOMAIN CORP.  V3.5008/18/93",                  5, 34,  3,  5, 0 },
index fc7eef776382ee5965f497fc88ed9b141d0d5187..8cac8c06cada7dbaf98db6b438ff0f791cfd1366 100644 (file)
@@ -159,6 +159,10 @@ typedef u32 u_int32;
 #define SCSI_NCR_MAX_SYNC   (10000)
 #endif
 
+#ifndef SCSI_NCR_DEFAULT_SYNC
+#define SCSI_NCR_DEFAULT_SYNC  SCSI_NCR_MAX_SYNC
+#endif
+
 /*
 **    The maximal bus with (in log2 byte)
 **    (0=8 bit, 1=16 bit)
@@ -1255,9 +1259,9 @@ struct ncb {
        int    chip;                    /* Chip number                       */
        struct timer_list timer;        /* Timer link header                 */
        int     ncr_cache;              /* Cache test variable               */
-       int     release_stage;          /* Synchronisation stage on release  */
        Scsi_Cmnd *waiting_list;        /* Waiting list header for commands  */
                                        /* that we can't put into the squeue */
+       u_char  release_stage;          /* Synchronisation stage on release  */
 
        /*-----------------------------------------------
        **      Added field to support differences
@@ -3883,9 +3887,10 @@ int ncr_queue_command (Scsi_Cmnd *cmd, void (* done)(Scsi_Cmnd *))
        **
        **----------------------------------------------------
        */
-
+#ifdef SCSI_NCR_PROFILE
        bzero (&cp->phys.header.stamp, sizeof (struct tstamp));
        cp->phys.header.stamp.start = jiffies;
+#endif
 
        /*----------------------------------------------------
        **
@@ -5007,10 +5012,10 @@ void ncr_init (ncb_p np, char * msg, u_long code)
 
        usrsync = 255;
 
-#ifndef SCSI_NCR_FORCE_ASYNCHRONOUS
+#if defined(SCSI_NCR_DEFAULT_SYNC) && SCSI_NCR_DEFAULT_SYNC != 0
        if (SCSI_NCR_MAX_SYNC) {
                u_long period;
-               period =1000000/SCSI_NCR_MAX_SYNC; /* ns = 10e6 / kHz */
+               period =1000000/SCSI_NCR_DEFAULT_SYNC; /* ns = 10e6 / kHz */
                if (period <= 11 * np->ns_sync) {
                        if (period < 4 * np->ns_sync)
                                usrsync = np->ns_sync;
@@ -5461,6 +5466,7 @@ static void ncr_timeout (ncb_p np)
                        */
                        OUTB (nc_istat, SIGP);
                }
+#ifdef undef
                if (np->latetime>10) {
                        /*
                        **      Although we tried to wake it up,
@@ -5481,6 +5487,7 @@ static void ncr_timeout (ncb_p np)
 #endif
                        np->heartbeat = thistime;
                }
+#endif /* undef */
 
                /*----------------------------------------------------
                **
@@ -7238,31 +7245,39 @@ if(DEBUG_FLAGS & DEBUG_SCATTER)
 
 static int     ncr_scatter(ccb_p cp, Scsi_Cmnd *cmd)
 {
-       struct dsb *phys = &cp->phys;
-       u_short segment  = 0;
-
-       cp->data_len = 0;
-       bzero (&phys->data, sizeof (phys->data));
-
-       if (!cmd->use_sg) {
-            phys->data[segment].addr = vtophys(cmd->request_buffer);
-            phys->data[segment].size = cmd->request_bufflen;
-            cp->data_len            += phys->data[segment].size;
-            segment++;
-            return segment;
+       struct scr_tblmove *data;
+       int segment     = 0;
+       int use_sg      = (int) cmd->use_sg;
+
+       bzero (cp->phys.data, sizeof (cp->phys.data));
+       data            = cp->phys.data;
+       cp->data_len    = 0;
+
+       if (!use_sg) {
+               if (cmd->request_bufflen) {
+                       data[0].addr    = vtophys(cmd->request_buffer);
+                       data[0].size    = cmd->request_bufflen;
+                       cp->data_len    = data[0].size;
+                       segment = 1;
+               }
        }
-
-       while (segment < cmd->use_sg && segment < MAX_SCATTER) {
-            struct scatterlist *scatter = (struct scatterlist *)cmd->buffer;
-
-            phys->data[segment].addr = vtophys(scatter[segment].address);
-            phys->data[segment].size = scatter[segment].length;
-            cp->data_len            += phys->data[segment].size;
-            ++segment;
+       else if (use_sg < MAX_SCATTER) {
+               struct scatterlist *scatter = (struct scatterlist *)cmd->buffer;
+
+               while (segment < use_sg) {
+                       data[segment].addr = vtophys(scatter[segment].address);
+                       data[segment].size = scatter[segment].length;
+                       cp->data_len       += data[segment].size;
+                       ++segment;
+               }
+       }
+       else {
+               return -1;
        }
 
-       return segment < cmd->use_sg ? -1 : segment;
+       return segment;
 }
+
 #endif /* SCSI_NCR_SEGMENT_SIZE */
 
 /*==========================================================
index c4aa781f3c89318490b6765fa90280a760bfa923..3a17e732e7c473ff599d2c8ef2ebf659f00515ce 100644 (file)
 #ifndef NCR53C8XX_H
 #define NCR53C8XX_H
 
+/*
+**     Name and revision of the driver
+*/
+#define SCSI_NCR_DRIVER_NAME           "ncr53c8xx - revision 1.14b"
 /*
 **     If SCSI_NCR_SPECIAL_FEATURES is defined,
 **     the driver enables or not the following features according to chip id 
 **     Avoid to change these constants, unless you know what you are doing.
 */
 
+#ifdef CONFIG_SCSI_NCR53C8XX_MAX_TAGS
+#if    CONFIG_SCSI_NCR53C8XX_MAX_TAGS < 2
+#define SCSI_NCR_MAX_TAGS      (2)
+#elif  CONFIG_SCSI_NCR53C8XX_MAX_TAGS > 12
+#define SCSI_NCR_MAX_TAGS      (12)
+#else
+#define        SCSI_NCR_MAX_TAGS       CONFIG_SCSI_NCR53C8XX_MAX_TAGS
+#endif
+#else
 #define SCSI_NCR_MAX_TAGS      (4)
+#endif
+
 #define SCSI_NCR_ALWAYS_SIMPLE_TAG
 
+#ifdef CONFIG_SCSI_NCR53C8XX_TAGGED_QUEUE
+#define SCSI_NCR_DEFAULT_TAGS  SCSI_NCR_MAX_TAGS
+#else
+#define SCSI_NCR_DEFAULT_TAGS  (0)
+#endif
+
 #ifdef CONFIG_SCSI_NCR53C8XX_IOMAPPED
 #define        SCSI_NCR_IOMAPPED
 #endif
 
-#ifdef CONFIG_SCSI_NCR53C8XX_TAGGED_QUEUE
-#define SCSI_NCR_DEFAULT_TAGS  SCSI_NCR_MAX_TAGS
+#ifdef CONFIG_SCSI_NCR53C8XX_SYNC
+#if    CONFIG_SCSI_NCR53C8XX_SYNC == 0
+#define        SCSI_NCR_DEFAULT_SYNC   (0)
+#elif  CONFIG_SCSI_NCR53C8XX_SYNC < 5
+#define        SCSI_NCR_DEFAULT_SYNC   (5000)
+#elif  CONFIG_SCSI_NCR53C8XX_SYNC > 10
+#define        SCSI_NCR_DEFAULT_SYNC   (10000)
 #else
-#define SCSI_NCR_DEFAULT_TAGS  (0)
+#define        SCSI_NCR_DEFAULT_SYNC   (CONFIG_SCSI_NCR53C8XX_SYNC * 1000)
+#endif
+#else
+#define        SCSI_NCR_DEFAULT_SYNC   (10000)
 #endif
 
-#ifdef CONFIG_SCSI_NCR53C8XX_NO_DISCONNECT
-#define SCSI_NCR_NO_DISCONNECT
+#ifdef CONFIG_SCSI_FORCE_ASYNCHRONOUS
+#undef SCSI_NCR_DEFAULT_SYNC
+#define SCSI_NCR_DEFAULT_SYNC  (0)
 #endif
 
-#ifdef CONFIG_SCSI_NCR53C8XX_FORCE_ASYNCHRONOUS
-#define SCSI_NCR_FORCE_ASYNCHRONOUS
+#ifdef CONFIG_SCSI_NCR53C8XX_NO_DISCONNECT
+#define SCSI_NCR_NO_DISCONNECT
 #endif
 
 #ifdef CONFIG_SCSI_NCR53C8XX_FORCE_SYNC_NEGO
@@ -230,7 +261,7 @@ int ncr53c8xx_release(struct Scsi_Host *);
 
 #if    LINUX_VERSION_CODE >= LinuxVersionCode(1,3,0)
 
-#define NCR53C8XX {NULL,NULL,NULL,NULL,"ncr53c8xx (rel 1.14a)", ncr53c8xx_detect,\
+#define NCR53C8XX {NULL,NULL,NULL,NULL,SCSI_NCR_DRIVER_NAME, ncr53c8xx_detect,\
        ncr53c8xx_release, /* info */ NULL, /* command, deprecated */ NULL,             \
        ncr53c8xx_queue_command, ncr53c8xx_abort, ncr53c8xx_reset,      \
         NULL /* slave attach */, scsicam_bios_param, /* can queue */ SCSI_NCR_CAN_QUEUE,\
@@ -241,7 +272,7 @@ int ncr53c8xx_release(struct Scsi_Host *);
 #else
 
 
-#define NCR53C8XX {NULL, NULL, "ncr53c8xx (rel 1.14a)", ncr53c8xx_detect,\
+#define NCR53C8XX {NULL, NULL, SCSI_NCR_DRIVER_NAME, ncr53c8xx_detect,\
        ncr53c8xx_release, /* info */ NULL, /* command, deprecated */ NULL,             \
        ncr53c8xx_queue_command, ncr53c8xx_abort, ncr53c8xx_reset,      \
         NULL /* slave attach */, scsicam_bios_param, /* can queue */ SCSI_NCR_CAN_QUEUE,\
index 15247acd0a731c61e07292d2896e1364bad1d7ef..e45d80f4e754724f84275c9e82436dc6a38f2a61 100644 (file)
@@ -54,22 +54,28 @@ int scsicam_bios_param (Disk *disk, /* SCSI disk */
     if (!(bh = bread(MKDEV(MAJOR(dev), MINOR(dev)&~0xf), 0, 1024)))
        return -1;
 
-#ifdef DEBUG
-       printk ("scsicam_bios_param : trying existing mapping\n");
-#endif
+    /* try to infer mapping from partition table */
     ret_code = partsize (bh, (unsigned long) size, (unsigned int *) ip + 2, 
        (unsigned int *) ip + 0, (unsigned int *) ip + 1);
     brelse (bh);
 
     if (ret_code == -1) {
-#ifdef DEBUG
-       printk ("scsicam_bios_param : trying optimal mapping\n");
-#endif
+       /* pick some standard mapping with at most 1024 cylinders,
+          and at most 62 sectors per track - this works up to
+          7905 MB */
        ret_code = setsize ((unsigned long) size, (unsigned int *) ip + 2, 
            (unsigned int *) ip + 0, (unsigned int *) ip + 1);
     }
 
-    return ret_code;
+    /* if something went wrong, then apparently we have to return
+       a geometry with more than 1024 cylinders */
+    if (ret_code || ip[0] > 255 || ip[1] > 63) {
+        ip[0] = 64;
+        ip[1] = 32;
+        ip[2] = size / (ip[0] * ip[1]);
+    }
+
+    return 0;
 }
 
 /*
index 695d92c466d81d66ebb9ad52952bf382905b6e7d..e4e9ad9fbce59326a5d1b7fcbee34a1abe00b046 100644 (file)
@@ -126,7 +126,7 @@ smb_put_inode(struct inode *inode)
        struct smb_inode_info *info = SMB_INOP(inode);
 
        int opened        = finfo->opened;
-       int mtime         = finfo->mtime;
+       int mtime         = inode->i_mtime;
        int file_id       = finfo->fileid;
        int isdir         = S_ISDIR(inode->i_mode);
        unsigned long ino = inode->i_ino;
diff --git a/include/asm-alpha/ide.h b/include/asm-alpha/ide.h
new file mode 100644 (file)
index 0000000..6ad91b1
--- /dev/null
@@ -0,0 +1,119 @@
+/*
+ *  linux/include/asm-alpha/ide.h
+ *
+ *  Copyright (C) 1994-1996  Linus Torvalds & authors
+ */
+
+/*
+ *  This file contains the alpha architecture specific IDE code.
+ */
+
+#ifndef __ASMalpha_IDE_H
+#define __ASMalpha_IDE_H
+
+#ifdef __KERNEL__
+
+typedef unsigned short ide_ioreg_t;
+
+#ifndef MAX_HWIFS
+#define MAX_HWIFS      4
+#endif
+
+#define ide_sti()      sti()
+
+static __inline__ int ide_default_irq(ide_ioreg_t base)
+{
+       switch (base) {
+               case 0x1f0: return 14;
+               case 0x170: return 15;
+               case 0x1e8: return 11;
+               case 0x168: return 10;
+               default:
+                       return 0;
+       }
+}
+
+static __inline__ ide_ioreg_t ide_default_io_base(int index)
+{
+       switch (index) {
+               case 0: return 0x1f0;
+               case 1: return 0x170;
+               case 2: return 0x1e8;
+               case 3: return 0x168;
+               default:
+                       return 0;
+       }
+}
+
+static __inline__ void ide_init_hwif_ports (ide_ioreg_t *p, ide_ioreg_t base, int *irq)
+{
+       ide_ioreg_t port = base;
+       int i = 8;
+
+       while (i--)
+               *p++ = port++;
+       *p++ = base + 0x206;
+       if (irq != NULL)
+               *irq = 0;
+}
+
+typedef union {
+       unsigned all                    : 8;    /* all of the bits together */
+       struct {
+               unsigned head           : 4;    /* always zeros here */
+               unsigned unit           : 1;    /* drive select number, 0 or 1 */
+               unsigned bit5           : 1;    /* always 1 */
+               unsigned lba            : 1;    /* using LBA instead of CHS */
+               unsigned bit7           : 1;    /* always 1 */
+       } b;
+       } select_t;
+
+static __inline__ int ide_request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *),
+                       unsigned long flags, const char *device, void *dev_id)
+{
+       return request_irq(irq, handler, flags, device, dev_id);
+}                      
+
+static __inline__ void ide_free_irq(unsigned int irq, void *dev_id)
+{
+       free_irq(irq, dev_id);
+}
+
+static __inline__ int ide_check_region (ide_ioreg_t from, unsigned int extent)
+{
+       return check_region(from, extent);
+}
+
+static __inline__ void ide_request_region (ide_ioreg_t from, unsigned int extent, const char *name)
+{
+       request_region(from, extent, name);
+}
+
+static __inline__ void ide_release_region (ide_ioreg_t from, unsigned int extent)
+{
+       release_region(from, extent);
+}
+
+/*
+ * The following are not needed for the non-m68k ports
+ */
+static __inline__ int ide_ack_intr (ide_ioreg_t base_port, ide_ioreg_t irq_port)
+{
+       return(1);
+}
+
+static __inline__ void ide_fix_driveid(struct hd_driveid *id)
+{
+}
+
+static __inline__ void ide_release_lock (int *ide_lock)
+{
+}
+
+static __inline__ void ide_get_lock (int *ide_lock, void (*handler)(int, void *, struct pt_regs *), void *data)
+{
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASMalpha_IDE_H */
index 5fb77913292be8e074a5fa47103c4d5ce1ad3f05..72ba38307bf1fcbef2cad1f4656d4c2e50506d70 100644 (file)
@@ -115,4 +115,31 @@ static inline unsigned short ip_compute_csum(unsigned char * buff, int len) {
     return csum_fold (csum_partial(buff, len, 0));
 }
 
+#define _HAVE_ARCH_IPV6_CSUM
+static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
+                                                    struct in6_addr *daddr,
+                                                    __u16 len,
+                                                    unsigned short proto,
+                                                    unsigned int sum) 
+{
+       __asm__("
+               addl 0(%1), %0
+               adcl 4(%1), %0
+               adcl 8(%1), %0
+               adcl 12(%1), %0
+               adcl 0(%2), %0
+               adcl 4(%2), %0
+               adcl 8(%2), %0
+               adcl 12(%2), %0
+               adcl %3, %0
+               adcl %4, %0
+               adcl $0, %0
+               "
+               : "=&r" (sum)
+               : "r" (saddr), "r" (daddr), 
+                 "r"(htonl((__u32) (len))), "r"(htonl(proto)), "0"(sum));
+
+       return csum_fold(sum);
+}
+
 #endif
diff --git a/include/asm-i386/ide.h b/include/asm-i386/ide.h
new file mode 100644 (file)
index 0000000..a2d7970
--- /dev/null
@@ -0,0 +1,119 @@
+/*
+ *  linux/include/asm-i386/ide.h
+ *
+ *  Copyright (C) 1994-1996  Linus Torvalds & authors
+ */
+
+/*
+ *  This file contains the i386 architecture specific IDE code.
+ */
+
+#ifndef __ASMi386_IDE_H
+#define __ASMi386_IDE_H
+
+#ifdef __KERNEL__
+
+typedef unsigned short ide_ioreg_t;
+
+#ifndef MAX_HWIFS
+#define MAX_HWIFS      4
+#endif
+
+#define ide_sti()      sti()
+
+static __inline__ int ide_default_irq(ide_ioreg_t base)
+{
+       switch (base) {
+               case 0x1f0: return 14;
+               case 0x170: return 15;
+               case 0x1e8: return 11;
+               case 0x168: return 10;
+               default:
+                       return 0;
+       }
+}
+
+static __inline__ ide_ioreg_t ide_default_io_base(int index)
+{
+       switch (index) {
+               case 0: return 0x1f0;
+               case 1: return 0x170;
+               case 2: return 0x1e8;
+               case 3: return 0x168;
+               default:
+                       return 0;
+       }
+}
+
+static __inline__ void ide_init_hwif_ports (ide_ioreg_t *p, ide_ioreg_t base, int *irq)
+{
+       ide_ioreg_t port = base;
+       int i = 8;
+
+       while (i--)
+               *p++ = port++;
+       *p++ = base + 0x206;
+       if (irq != NULL)
+               *irq = 0;
+}
+
+typedef union {
+       unsigned all                    : 8;    /* all of the bits together */
+       struct {
+               unsigned head           : 4;    /* always zeros here */
+               unsigned unit           : 1;    /* drive select number, 0 or 1 */
+               unsigned bit5           : 1;    /* always 1 */
+               unsigned lba            : 1;    /* using LBA instead of CHS */
+               unsigned bit7           : 1;    /* always 1 */
+       } b;
+       } select_t;
+
+static __inline__ int ide_request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *),
+                       unsigned long flags, const char *device, void *dev_id)
+{
+       return request_irq(irq, handler, flags, device, dev_id);
+}                      
+
+static __inline__ void ide_free_irq(unsigned int irq, void *dev_id)
+{
+       free_irq(irq, dev_id);
+}
+
+static __inline__ int ide_check_region (ide_ioreg_t from, unsigned int extent)
+{
+       return check_region(from, extent);
+}
+
+static __inline__ void ide_request_region (ide_ioreg_t from, unsigned int extent, const char *name)
+{
+       request_region(from, extent, name);
+}
+
+static __inline__ void ide_release_region (ide_ioreg_t from, unsigned int extent)
+{
+       release_region(from, extent);
+}
+
+/*
+ * The following are not needed for the non-m68k ports
+ */
+static __inline__ int ide_ack_intr (ide_ioreg_t base_port, ide_ioreg_t irq_port)
+{
+       return(1);
+}
+
+static __inline__ void ide_fix_driveid(struct hd_driveid *id)
+{
+}
+
+static __inline__ void ide_release_lock (int *ide_lock)
+{
+}
+
+static __inline__ void ide_get_lock (int *ide_lock, void (*handler)(int, void *, struct pt_regs *), void *data)
+{
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* __ASMi386_IDE_H */
index 9c61412f4d4ec2240eeb7c1d849ce824d1c9954c..ab5aaf2610c65242ca88f4a2a06d1ecedf132690 100644 (file)
@@ -1,8 +1,35 @@
+/*
+ *  linux/include/asm-m68k/ide.h
+ *
+ *  Copyright (C) 1994-1996  Linus Torvalds & authors
+ */
+/* Copyright(c) 1996 Kars de Jong */
+/* Based on the ide driver from 1.2.13pl8 */
+
+/*
+ * Credits (alphabetical):
+ *
+ *  - Bjoern Brauel
+ *  - Kars de Jong
+ *  - Torsten Ebeling
+ *  - Dwight Engen
+ *  - Thorsten Floeck
+ *  - Roman Hodek
+ *  - Guenther Kelleter
+ *  - Chris Lawrence
+ *  - Michael Rausch
+ *  - Christian Sauer
+ *  - Michael Schmitz
+ *  - Jes Soerensen
+ *  - Michael Thurm
+ *  - Geert Uytterhoeven
+ */
+
 #ifndef _M68K_IDE_H
 #define _M68K_IDE_H
 
-/* Copyright(c) 1996 Kars de Jong */
-/* Based on the ide driver from 1.2.13pl8 */
+#ifdef __KERNEL__
 
 #include <linux/config.h>
 
 #include <asm/atari_stdma.h>
 #endif /* CONFIG_ATARI */
 
-#include <asm/bootinfo.h>
+#include <asm/setup.h>
+#include <asm/io.h>
+#include <asm/irq.h>
 
-struct hd_regs_struct {
-  unsigned int hd_error,
-  hd_nsector,
-  hd_sector,
-  hd_lcyl,
-  hd_hcyl,
-  hd_select,
-  hd_status;
-};
+typedef unsigned char * ide_ioreg_t;
+
+#ifndef MAX_HWIFS
+#define MAX_HWIFS      1
+#endif
+
+static __inline int ide_default_irq (ide_ioreg_t base)
+{
+       return 0;
+}
+
+static __inline__ ide_ioreg_t ide_default_io_base (int index)
+{
+       if (index)
+               return NULL;
+#ifdef CONFIG_AMIGA
+       if (MACH_IS_AMIGA) {
+               if (AMIGAHW_PRESENT(A4000_IDE)) {
+                       printk("Gayle IDE interface (A%d style)\n", 4000);
+                       return ((ide_ioreg_t)ZTWO_VADDR(HD_BASE_A4000));
+               }
+               if (AMIGAHW_PRESENT(A1200_IDE)) {
+                       printk("Gayle IDE interface (A%d style)\n", 1200);
+                       return ((ide_ioreg_t)ZTWO_VADDR(HD_BASE_A1200));
+               }
+       }
+#endif /* CONFIG_AMIGA */
+#ifdef CONFIG_ATARI
+       if (MACH_IS_ATARI) {
+               if (ATARIHW_PRESENT(IDE)) {
+                       printk("Falcon IDE interface\n");
+                       return ((ide_ioreg_t) ATA_HD_BASE);
+               }
+       }
+#endif /* CONFIG_ATARI */
+       return NULL;
+}
+
+static __inline__ void ide_init_hwif_ports (ide_ioreg_t *p, ide_ioreg_t base, int *irq)
+{
+       *p++ = base;
+#ifdef CONFIG_AMIGA
+       if (MACH_IS_AMIGA) {
+               *p++ = base + AMI_HD_ERROR;
+               *p++ = base + AMI_HD_NSECTOR;
+               *p++ = base + AMI_HD_SECTOR;
+               *p++ = base + AMI_HD_LCYL;
+               *p++ = base + AMI_HD_HCYL;
+               *p++ = base + AMI_HD_SELECT;
+               *p++ = base + AMI_HD_STATUS;
+               *p++ = base + AMI_HD_CMD;
+               if (AMIGAHW_PRESENT(A4000_IDE))
+                       *p++ = (ide_ioreg_t) ZTWO_VADDR(HD_A4000_IRQ);
+               else if (AMIGAHW_PRESENT(A1200_IDE))
+                       *p++ = (ide_ioreg_t) ZTWO_VADDR(HD_A1200_IRQ);
+               if (irq != NULL)
+                       *irq = IRQ_AMIGA_PORTS;
+       }
+#endif /* CONFIG_AMIGA */
+#ifdef CONFIG_ATARI
+       if (MACH_IS_ATARI) {
+               *p++ = base + ATA_HD_ERROR;
+               *p++ = base + ATA_HD_NSECTOR;
+               *p++ = base + ATA_HD_SECTOR;
+               *p++ = base + ATA_HD_LCYL;
+               *p++ = base + ATA_HD_HCYL;
+               *p++ = base + ATA_HD_CURRENT;
+               *p++ = base + ATA_HD_STATUS;
+               *p++ = base + ATA_HD_CMD;
+               if (irq != NULL)
+                       *irq = IRQ_MFP_IDE;
+       }
+#endif /* CONFIG_ATARI */
+}
+
+typedef union {
+       unsigned all                    : 8;    /* all of the bits together */
+       struct {
+               unsigned bit7           : 1;    /* always 1 */
+               unsigned lba            : 1;    /* using LBA instead of CHS */
+               unsigned bit5           : 1;    /* always 1 */
+               unsigned unit           : 1;    /* drive select number, 0 or 1 */
+               unsigned head           : 4;    /* always zeros here */
+       } b;
+       } select_t;
 
-static struct hd_regs_struct hd_regs;
-static void probe_m68k_ide (void);
-
-/* Undefine these again, they were defined for the PC. */
-#undef IDE_ERROR_OFFSET
-#undef IDE_NSECTOR_OFFSET
-#undef IDE_SECTOR_OFFSET
-#undef IDE_LCYL_OFFSET
-#undef IDE_HCYL_OFFSET
-#undef IDE_SELECT_OFFSET
-#undef IDE_STATUS_OFFSET
-#undef IDE_FEATURE_OFFSET
-#undef IDE_COMMAND_OFFSET
-#undef SELECT_DRIVE
-
-#define IDE_ERROR_OFFSET       hd_regs.hd_error
-#define IDE_NSECTOR_OFFSET     hd_regs.hd_nsector
-#define IDE_SECTOR_OFFSET      hd_regs.hd_sector
-#define IDE_LCYL_OFFSET                hd_regs.hd_lcyl
-#define IDE_HCYL_OFFSET                hd_regs.hd_hcyl
-#define IDE_SELECT_OFFSET      hd_regs.hd_select
-#define IDE_STATUS_OFFSET      hd_regs.hd_status
-#define IDE_FEATURE_OFFSET     IDE_ERROR_OFFSET
-#define IDE_COMMAND_OFFSET     IDE_STATUS_OFFSET
+static __inline__ int ide_request_irq(unsigned int irq, void (*handler)(int, void *, struct pt_regs *),
+                       unsigned long flags, const char *device, void *dev_id)
+{
+#ifdef CONFIG_AMIGA
+       if (MACH_IS_AMIGA)
+               return request_irq(irq, handler, 0, device, dev_id);
+#endif /* CONFIG_AMIGA */
+       return 0;
+}
+
+static __inline__ void ide_free_irq(unsigned int irq, void *dev_id)
+{
+#ifdef CONFIG_AMIGA
+       if (MACH_IS_AMIGA)
+               free_irq(irq, dev_id);
+#endif /* CONFIG_AMIGA */
+}
+
+/*
+ * We should really implement those some day.
+ */
+static __inline__ int ide_check_region (ide_ioreg_t from, unsigned int extent)
+{
+       return 0;
+}
+
+static __inline__ void ide_request_region (ide_ioreg_t from, unsigned int extent, const char *name)
+{
+}
+
+static __inline__ void ide_release_region (ide_ioreg_t from, unsigned int extent)
+{
+}
+
+#undef SUPPORT_SLOW_DATA_PORTS
+#define SUPPORT_SLOW_DATA_PORTS 0
 
 #undef SUPPORT_VLB_SYNC
 #define SUPPORT_VLB_SYNC 0
@@ -62,38 +179,11 @@ static void probe_m68k_ide (void);
 #undef HD_DATA
 #define HD_DATA NULL
 
-/* MSch: changed sti() to STI() wherever possible in ide.c; moved STI() def. 
- * to asm/ide.h 
- */
-/* The Atari interrupt structure strictly requires that the IPL isn't lowered
- * uncontrolled in an interrupt handler. In the concrete case, the IDE
- * interrupt is already a slow int, so the irq is already disabled at the time
- * the handler is called, and the IPL has been lowered to the minimum value
- * possible. To avoid going below that, STI() checks for being called inside
- * an interrupt, and in that case it does nothing. Hope that is reasonable and
- * works. (Roman)
- */
-#if defined(CONFIG_ATARI) && !defined(CONFIG_AMIGA)
-#define        STI()                                   \
-    do {                                       \
-       if (!intr_count) sti();                 \
-    } while(0)
-#elif defined(CONFIG_ATARI)
-#define        STI()                                           \
-    do {                                               \
-       if (!MACH_IS_ATARI || !intr_count) sti();       \
-    } while(0)
-#else /* !defined(CONFIG_ATARI) */
-#define        STI()   sti()
-#endif
-
-#define SELECT_DRIVE(hwif,drive)  OUT_BYTE((drive)->select.all, hwif->io_base+IDE_SELECT_OFFSET);
-
-#define insl(data_reg, buffer, wcount) insw(data_reg, buffer, wcount<<1)
-#define outsl(data_reg, buffer, wcount) outsw(data_reg, buffer, wcount<<1)
+#define insl(data_reg, buffer, wcount) insw(data_reg, buffer, (wcount)<<1)
+#define outsl(data_reg, buffer, wcount) outsw(data_reg, buffer, (wcount)<<1)
 
 #define insw(port, buf, nr) \
-    if (nr % 16) \
+    if ((nr) % 16) \
        __asm__ __volatile__ \
               ("movel %0,%/a0; \
                 movel %1,%/a1; \
@@ -128,10 +218,10 @@ static void probe_m68k_ide (void);
                 movew %/a0@,%/a1@+; \
                 dbra %/d6,1b" : \
                : "g" (port), "g" (buf), "g" (nr) \
-               : "a0", "a1", "d6");
+               : "a0", "a1", "d6")
 
 #define outsw(port, buf, nr) \
-    if (nr % 16) \
+    if ((nr) % 16) \
        __asm__ __volatile__ \
               ("movel %0,%/a0; \
                 movel %1,%/a1; \
@@ -166,7 +256,128 @@ static void probe_m68k_ide (void);
                 movew %/a1@+,%/a0@; \
                 dbra %/d6,1b" : \
                : "g" (port), "g" (buf), "g" (nr) \
-               : "a0", "a1", "d6");
+               : "a0", "a1", "d6")
+
+#ifdef CONFIG_ATARI
+#define insl_swapw(data_reg, buffer, wcount) \
+    insw_swapw(data_reg, buffer, (wcount)<<1)
+#define outsl_swapw(data_reg, buffer, wcount) \
+    outsw_swapw(data_reg, buffer, (wcount)<<1)
+
+#define insw_swapw(port, buf, nr) \
+    if ((nr) % 8) \
+       __asm__ __volatile__ \
+              ("movel %0,%/a0; \
+                movel %1,%/a1; \
+                movel %2,%/d6; \
+                subql #1,%/d6; \
+              1:movew %/a0@,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a1@+; \
+                dbra %/d6,1b" : \
+               : "g" (port), "g" (buf), "g" (nr) \
+               : "d0", "a0", "a1", "d6"); \
+    else \
+       __asm__ __volatile__ \
+              ("movel %0,%/a0; \
+                movel %1,%/a1; \
+                movel %2,%/d6; \
+                lsrl  #3,%/d6; \
+                subql #1,%/d6; \
+              1:movew %/a0@,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a1@+; \
+                movew %/a0@,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a1@+; \
+                movew %/a0@,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a1@+; \
+                movew %/a0@,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a1@+; \
+                movew %/a0@,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a1@+; \
+                movew %/a0@,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a1@+; \
+                movew %/a0@,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a1@+; \
+                movew %/a0@,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a1@+; \
+                dbra %/d6,1b" : \
+               : "g" (port), "g" (buf), "g" (nr) \
+               : "d0", "a0", "a1", "d6")
+
+#define outsw_swapw(port, buf, nr) \
+    if ((nr) % 8) \
+       __asm__ __volatile__ \
+              ("movel %0,%/a0; \
+                movel %1,%/a1; \
+                movel %2,%/d6; \
+                subql #1,%/d6; \
+              1:movew %/a1@+,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a0@; \
+                dbra %/d6,1b" : \
+               : "g" (port), "g" (buf), "g" (nr) \
+               : "d0", "a0", "a1", "d6"); \
+    else \
+       __asm__ __volatile__ \
+              ("movel %0,%/a0; \
+                movel %1,%/a1; \
+                movel %2,%/d6; \
+                lsrl  #3,%/d6; \
+                subql #1,%/d6; \
+              1:movew %/a1@+,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a0@; \
+                movew %/a1@+,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a0@; \
+                movew %/a1@+,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a0@; \
+                movew %/a1@+,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a0@; \
+                movew %/a1@+,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a0@; \
+                movew %/a1@+,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a0@; \
+                movew %/a1@+,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a0@; \
+                movew %/a1@+,%/d0; \
+                rolw  #8,%/d0; \
+                movew %/d0,%/a0@; \
+                dbra %/d6,1b" : \
+               : "g" (port), "g" (buf), "g" (nr) \
+               : "d0", "a0", "a1", "d6")
+
+#endif /* CONFIG_ATARI */
+
+static __inline__ int ide_ack_intr (ide_ioreg_t base_port, ide_ioreg_t irq_port)
+{
+#ifdef CONFIG_AMIGA
+       if (MACH_IS_AMIGA) {
+               unsigned char ch;
+               ch = inb(irq_port);
+               if (!(ch & 0x80))
+                       return(0);
+               if (AMIGAHW_PRESENT(A1200_IDE)) {
+                       (void) inb(base_port);
+                       outb(0x7c | (ch & 0x03), irq_port);
+               }
+       }
+#endif /* CONFIG_AMIGA */
+       return(1);
+}
 
 #define T_CHAR          (0x0000)        /* char:  don't touch  */
 #define T_SHORT         (0x4000)        /* short: 12 -> 21     */
@@ -181,6 +392,7 @@ static void probe_m68k_ide (void);
 #define D_INT(cnt)      (T_INT   | (cnt))
 #define D_TEXT(cnt)     (T_TEXT  | (cnt))
 
+#ifdef CONFIG_AMIGA
 static u_short driveid_types[] = {
        D_SHORT(10),    /* config - vendor2 */
        D_TEXT(20),     /* serial_no */
@@ -199,13 +411,17 @@ static u_short driveid_types[] = {
 };
 
 #define num_driveid_types       (sizeof(driveid_types)/sizeof(*driveid_types))
+#endif /* CONFIG_AMIGA */
 
-static __inline__ void big_endianize_driveid(struct hd_driveid *id)
+static __inline__ void ide_fix_driveid(struct hd_driveid *id)
 {
+#ifdef CONFIG_AMIGA
    u_char *p = (u_char *)id;
    int i, j, cnt;
    u_char t;
 
+   if (!MACH_IS_AMIGA)
+       return;
    for (i = 0; i < num_driveid_types; i++) {
       cnt = driveid_types[i] & T_MASK_COUNT;
       switch (driveid_types[i] & T_MASK_TYPE) {
@@ -241,6 +457,66 @@ static __inline__ void big_endianize_driveid(struct hd_driveid *id)
             break;
       }
    }
+#endif /* CONFIG_AMIGA */
+}
+
+static __inline__ void ide_release_lock (int *ide_lock)
+{
+#ifdef CONFIG_ATARI
+       if (MACH_IS_ATARI) {
+               if (*ide_lock == 0) {
+                       printk("ide_release_lock: bug\n");
+                       return;
+               }
+               *ide_lock = 0;
+               stdma_release();
+       }
+#endif /* CONFIG_ATARI */
 }
 
+static __inline__ void ide_get_lock (int *ide_lock, void (*handler)(int, void *, struct pt_regs *), void *data)
+{
+#ifdef CONFIG_ATARI
+       if (MACH_IS_ATARI) {
+               if (*ide_lock == 0) {
+                       if (intr_count > 0)
+                               panic( "Falcon IDE hasn't ST-DMA lock in interrupt" );
+                       stdma_lock(handler, data);
+                       *ide_lock = 1;
+               }
+       }
+#endif /* CONFIG_ATARI */
+}
+
+/*
+ * On the Atari, we sometimes can't enable interrupts:
+ */
+
+/* MSch: changed sti() to STI() wherever possible in ide.c; moved STI() def. 
+ * to asm/ide.h 
+ */
+/* The Atari interrupt structure strictly requires that the IPL isn't lowered
+ * uncontrolled in an interrupt handler. In the concrete case, the IDE
+ * interrupt is already a slow int, so the irq is already disabled at the time
+ * the handler is called, and the IPL has been lowered to the minimum value
+ * possible. To avoid going below that, STI() checks for being called inside
+ * an interrupt, and in that case it does nothing. Hope that is reasonable and
+ * works. (Roman)
+ */
+#if defined(CONFIG_ATARI) && !defined(CONFIG_AMIGA)
+#define        ide_sti()                                       \
+    do {                                               \
+       if (!intr_count) sti();                         \
+    } while(0)
+#elif defined(CONFIG_ATARI)
+#define        ide_sti()                                       \
+    do {                                               \
+       if (!MACH_IS_ATARI || !intr_count) sti();       \
+    } while(0)
+#else /* !defined(CONFIG_ATARI) */
+#define        ide_sti()       sti()
+#endif
+
+#endif /* __KERNEL__ */
+
 #endif /* _M68K_IDE_H */
index d9aff22d6bdf1acaf724464a80ee7326211339ba..034aa3d0bf6417f857a4b0faae37e0c2fa24a3f2 100644 (file)
@@ -176,4 +176,76 @@ static inline unsigned short ip_compute_csum(unsigned char * buff, int len) {
        return sum;
 }
 
+#define _HAVE_ARCH_IPV6_CSUM
+static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
+                                                    struct in6_addr *daddr,
+                                                    __u16 len,
+                                                    unsigned short proto,
+                                                    unsigned int sum) 
+{
+       unsigned long scratch;
+
+        __asm__("
+               .set    noreorder
+               .set    noat
+               addu    %0,%5           # proto (long in network byte order)
+               sltu    $1,%0,%5
+               addu    %0,$1
+
+               addu    %0,%6           # csum
+               sltu    $1,%0,%6
+               lw      %1,0(%2)        # four words source address
+               addu    %0,$1
+               addu    %0,%1
+               sltu    $1,%0,$1
+
+               lw      %1,4(%2)
+               addu    %0,$1
+               addu    %0,%1
+               sltu    $1,%0,$1
+
+               lw      %1,8(%2)
+               addu    %0,$1
+               addu    %0,%1
+               sltu    $1,%0,$1
+
+               lw      %1,12(%2)
+               addu    %0,$1
+               addu    %0,%1
+               sltu    $1,%0,$1
+
+               lw      %1,0(%3)
+               addu    %0,$1
+               addu    %0,%1
+               sltu    $1,%0,$1
+
+               lw      %1,4(%3)
+               addu    %0,$1
+               addu    %0,%1
+               sltu    $1,%0,$1
+
+               lw      %1,8(%3)
+               addu    %0,$1
+               addu    %0,%1
+               sltu    $1,%0,$1
+
+               lw      %1,12(%3)
+               addu    %0,$1
+               addu    %0,%1
+               sltu    $1,%0,$1
+               .set    noat
+               .set    noreorder
+                "
+                : "=r" (sum),
+                 "=r" (scratch)
+                : "r" (saddr),
+                 "r" (daddr),
+                  "0" (htonl((__u32) (len))),
+                 "r" (htonl(proto)),
+                 "r"(sum)
+               : "$1");
+
+       return csum_fold(sum);
+}
+
 #endif /* __ASM_MIPS_CHECKSUM_H */
index c893af2f6ddaa805e8dbcfed54590cf8057bd9a2..e443123d381eada6a415f8632dbc8e592f51eea5 100644 (file)
  * derived from:
  *     Alpha checksum c-code
  *      ix86 inline assembly
+ *      RFC1071 Computing the Internet Checksum
  */
 
 /*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
  */
+extern unsigned int csum_partial(unsigned char * buff, int len, unsigned int sum);
 
-extern inline unsigned short csum_tcpudp_magic(unsigned long saddr,
-                                              unsigned long daddr,
-                                              unsigned short len,
-                                              unsigned short proto,
-                                              unsigned int sum)
-{
-       __asm__ __volatile__("
-               addcc   %0, %1, %0
-               addxcc  %0, %4, %0
-               addxcc  %0, %5, %0
-               addx    %0, %%g0, %0
-
-               ! We need the carry from the addition of 16-bit
-               ! significant addition, so we zap out the low bits
-               ! in one half, zap out the high bits in another,
-               ! shift them both up to the top 16-bits of a word
-               ! and do the carry producing addition, finally
-               ! shift the result back down to the low 16-bits.
-
-               ! Actually, we can further optimize away two shifts
-               ! because we know the low bits of the original
-               ! value will be added to zero-only bits so cannot
-               ! affect the addition result nor the final carry
-               ! bit.
-
-               sll     %0, 16, %1
-               addcc   %0, %1, %0              ! add and set carry, neat eh?
-               srl     %0, 16, %0              ! shift back down the result
-               addx    %0, %%g0, %0            ! get remaining carry bit
-               xnor    %%g0, %0, %0            ! negate, sparc is cool
-               "
-               : "=&r" (sum), "=&r" (saddr)
-               : "0" (daddr), "1" (saddr), "r" (len+proto), "r" (sum));
-               return ((unsigned short) sum); 
-}
-
-extern inline unsigned short from32to16(unsigned long x)
-{
-       __asm__ __volatile__("
-               addcc   %0, %1, %0
-               srl     %0, 16, %0
-               addx    %%g0, %0, %0
-               "
-               : "=r" (x)
-               : "r" (x << 16), "0" (x));
-       return x;
-}
-
-extern inline unsigned long do_csum(unsigned char * buff, int len)
-{
-       int odd, count;
-       unsigned long result = 0;
+/*
+ * the same as csum_partial, but copies from fs:src while it
+ * checksums
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+extern unsigned int csum_partial_copy(char *src, char *dst, int len, int sum);
 
-       if (len <= 0)
-               goto out;
-       odd = 1 & (unsigned long) buff;
-       if (odd) {
-               result = *buff;
-               len--;
-               buff++;
-       }
-       count = len >> 1;               /* nr of 16-bit words.. */
-       if (count) {
-               if (2 & (unsigned long) buff) {
-                       result += *(unsigned short *) buff;
-                       count--;
-                       len -= 2;
-                       buff += 2;
-               }
-               count >>= 1;            /* nr of 32-bit words.. */
-               if (count) {
-                       unsigned long carry = 0;
-                       do {
-                               unsigned long w = *(unsigned long *) buff;
-                               count--;
-                               buff += 4;
-                               result += carry;
-                               result += w;
-                               carry = (w > result);
-                       } while (count);
-                       result += carry;
-                       result = (result & 0xffff) + (result >> 16);
-               }
-               if (len & 2) {
-                       result += *(unsigned short *) buff;
-                       buff += 2;
-               }
-       }
-       if (len & 1)
-               result += (*buff << 8);
-       result = from32to16(result);
-       if (odd)
-               result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
-out:
-       return result;
-}
+#define csum_partial_copy_fromuser(s, d, l, w)  \
+                       csum_partial_copy((char *) (s), (d), (l), (w))
 
 /* ihl is always 5 or greater, almost always is 5, iph is always word
  * aligned but can fail to be dword aligned very often.
  */
 extern inline unsigned short ip_fast_csum(const unsigned char *iph, unsigned int ihl)
 {
-       unsigned int sum;
+       unsigned long tmp1, tmp2;
+       unsigned short sum;
 
        __asm__ __volatile__("
-               ld      [%1], %0
+               ld      [%1 + 0x00], %0
+               ld      [%1 + 0x04], %3
                sub     %2, 4, %2
-               ld      [%1 + 0x4], %%g1
-               ld      [%1 + 0x8], %%g2
-               addcc   %%g1, %0, %0
-               addxcc  %%g2, %0, %0
-               ld      [%1 + 0xc], %%g1
-               ld      [%1 + 0x10], %%g2
-               addxcc  %%g1, %0, %0
-               addxcc  %0, %%g0, %0
-1:
-               addcc   %%g2, %0, %0
-               add     %1, 0x4, %1
+               addcc   %3, %0, %0
+               ld      [%1 + 0x08], %4
+               addxcc  %4, %0, %0
+               ld      [%1 + 0x0c], %3
+               addxcc  %3, %0, %0
+               ld      [%1 + 0x10], %4
+               addx    %0, %%g0, %0
+       1:
+               addcc   %4, %0, %0
+               add     %1, 4, %1
                addxcc  %0, %%g0, %0
-               subcc   %2, 0x1, %2
-               bne,a   1b
-                ld     [%1 + 0x10], %%g2
+               subcc   %2, 1, %2
+               be,a    2f
+                sll    %0, 16, %3
+
+               b       1b
+                ld     [%1 + 0x10], %4
+       2:
+               addcc   %0, %3, %3
+               srl     %3, 16, %0
+               addx    %0, %%g0, %0
+               xnor    %%g0, %0, %0
+       " : "=r" (sum), "=&r" (iph), "=&r" (ihl), "=r" (tmp1), "=r" (tmp2)
+         : "1" (iph), "2" (ihl));
 
-               sll     %0, 16, %2
-               addcc   %0, %2, %2
-               srl     %2, 16, %0
-               addx    %0, %%g0, %2
-               xnor    %%g0, %2, %0
-2:
-               "
-               : "=&r" (sum), "=&r" (iph), "=&r" (ihl)
-               : "1" (iph), "2" (ihl)
-               : "g1", "g2");
        return sum;
 }
 
 /*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
- *
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
- *
- * it's best to have buff aligned on a 32-bit boundary
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
  */
-extern inline unsigned int csum_partial(unsigned char * buff, int len, unsigned int sum)
+extern inline unsigned short csum_tcpudp_magic(unsigned long saddr, unsigned long daddr,
+                                              unsigned int len, unsigned short proto,
+                                              unsigned int sum)
 {
        __asm__ __volatile__("
-               mov     0, %%g5                 ! g5 = result
-               cmp     %1, 0
-               bgu,a   1f
-                andcc  %0, 1, %%g7             ! g7 = odd
-
-               b,a     9f
-
-1:
-               be,a    1f
-                srl    %1, 1, %%g6             ! g6 = count = (len >> 1)
-
-               sub     %1, 1, %1       ! if(odd) { result = *buff;
-               ldub    [%0], %%g5      !           len--;
-               add     %0, 1, %0       !           buff++ }
-
-               srl     %1, 1, %%g6
-1:
-               cmp     %%g6, 0         ! if (count) {
-               be,a    8f
-                andcc  %1, 1, %%g0
-
-               andcc   %0, 2, %%g0     ! if (2 & buff) {
-               be,a    1f
-                srl    %%g6, 1, %%g6
-
-               sub     %1, 2, %1       !       result += *(unsigned short *) buff;
-               lduh    [%0], %%g1      !       count--; 
-               sub     %%g6, 1, %%g6   !       len -= 2;
-               add     %%g1, %%g5, %%g5!       buff += 2; 
-               add     %0, 2, %0       ! }
-
-               srl     %%g6, 1, %%g6
-1:
-               cmp     %%g6, 0         ! if (count) {
-               be,a    2f
-                andcc  %1, 2, %%g0
-
-               ld      [%0], %%g1              ! csum aligned 32bit words
-1:
-               add     %0, 4, %0
-               addcc   %%g1, %%g5, %%g5
-               addx    %%g5, %%g0, %%g5
-               subcc   %%g6, 1, %%g6
-               bne,a   1b
-                ld     [%0], %%g1
-
-               sethi   %%hi(0xffff), %%g3
-               srl     %%g5, 16, %%g2
-               or      %%g3, %%lo(0xffff), %%g3
-               and     %%g5, %%g3, %%g5
-               add     %%g2, %%g5, %%g5! }
-
-               andcc   %1, 2, %%g0
-2:
-               be,a    8f              ! if (len & 2) {
-                andcc  %1, 1, %%g0
-
-               lduh    [%0], %%g1      !       result += *(unsigned short *) buff; 
-               add     %%g5, %%g1, %%g5!       buff += 2; 
-               add     %0, 2, %0       ! }
-
-
-               andcc   %1, 1, %%g0
-8:
-               be,a    1f              ! if (len & 1) {
-                sll    %%g5, 16, %%g1
-
-               ldub    [%0], %%g1
-               sll     %%g1, 8, %%g1   !       result += (*buff << 8); 
-               add     %%g5, %%g1, %%g5! }
-
-               sll     %%g5, 16, %%g1
-1:
-               addcc   %%g1, %%g5, %%g5! result = from32to16(result);
-               srl     %%g5, 16, %%g1
-               addx    %%g0, %%g1, %%g5
-
-               orcc    %%g7, %%g0, %%g0! if(odd) {
-               be      9f
-                srl    %%g5, 8, %%g1
-
-               and     %%g5, 0xff, %%g2!       result = ((result >> 8) & 0xff) |
-               and     %%g1, 0xff, %%g1!               ((result & 0xff) << 8);
-               sll     %%g2, 8, %%g2
-               or      %%g2, %%g1, %%g5! }
-9:
-               addcc   %2, %%g5, %2    ! add result and sum with carry
-               addx    %%g0, %2, %2
-       " :
-        "=&r" (buff), "=&r" (len), "=&r" (sum) :
-        "0" (buff), "1" (len), "2" (sum) :
-       "g1", "g2", "g3", "g5", "g6", "g7"); 
+               addcc   %1, %0, %0
+               addxcc  %2, %0, %0
+               addxcc  %3, %0, %0
+               addx    %0, %%g0, %0
+               sll     %0, 16, %1
+               addcc   %1, %0, %0
+               srl     %0, 16, %0
+               addx    %0, %%g0, %0
+               xnor    %%g0, %0, %0
+       " : "=r" (sum), "=r" (saddr)
+         : "r" (daddr), "r" ((proto<<16)+len), "0" (sum), "1" (saddr));
 
        return sum;
 }
 
 /*
- * the same as csum_partial, but copies from fs:src while it
- * checksums
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
+ *     Fold a partial checksum without adding pseudo headers
  */
-extern inline unsigned int csum_partial_copy(char *src, char *dst, int len, int sum)
+extern inline unsigned int csum_fold(unsigned int sum)
 {
-       /*
-        * The whole idea is to do the copy and the checksum at
-        * the same time, but we do it the easy way now.
-        *
-        * At least csum on the source, not destination, for cache
-        * reasons..
-        */
-       sum = csum_partial(src, len, sum);
-       memcpy(dst, src, len);
+       unsigned int tmp;
+
+       __asm__ __volatile__("
+               addcc   %0, %1, %1
+               srl     %1, 16, %1
+               addx    %1, %%g0, %1
+               xnor    %%g0, %1, %0
+       " : "=&r" (sum), "=r" (tmp)
+         : "0" (sum), "1" (sum<<16));
+
        return sum;
 }
 
-/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
- */
-extern inline unsigned short ip_compute_csum(unsigned char * buff, int len)
+#define _HAVE_ARCH_IPV6_CSUM
+
+static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
+                                                    struct in6_addr *daddr,
+                                                    __u16 len,
+                                                    unsigned short proto,
+                                                    unsigned int sum) 
 {
-       return ~from32to16(do_csum(buff,len));
-}
+       __asm__ __volatile__ ("
+               addcc   %3, %4, %%g4
+               addxcc  %5, %%g4, %%g4
+               ld      [%2 + 0x0c], %%g2
+               ld      [%2 + 0x08], %%g3
+               addxcc  %%g2, %%g4, %%g4
+               ld      [%2 + 0x04], %%g2
+               addxcc  %%g3, %%g4, %%g4
+               ld      [%2 + 0x00], %%g3
+               addxcc  %%g2, %%g4, %%g4
+               ld      [%1 + 0x0c], %%g2
+               addxcc  %%g3, %%g4, %%g4
+               ld      [%1 + 0x08], %%g3
+               addxcc  %%g2, %%g4, %%g4
+               ld      [%1 + 0x04], %%g2
+               addxcc  %%g3, %%g4, %%g4
+               ld      [%1 + 0x00], %%g3
+               addxcc  %%g2, %%g4, %%g4
+               addxcc  %%g3, %%g4, %0
+               addx    0, %0, %0
+               "
+               : "=&r" (sum)
+               : "r" (saddr), "r" (daddr), 
+                 "r"(htonl((__u32) (len))), "r"(htonl(proto)), "r"(sum)
+               : "g2", "g3", "g4");
 
-#define csum_partial_copy_fromuser(s, d, l, w)  \
-                       csum_partial_copy((char *) (s), (d), (l), (w))
+       return csum_fold(sum);
+}
 
 /*
- *     Fold a partial checksum without adding pseudo headers
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
  */
-extern inline unsigned int csum_fold(unsigned int sum)
+extern inline unsigned short ip_compute_csum(unsigned char * buff, int len)
 {
-       __asm__ __volatile__("
-               addcc   %0, %1, %0
-               srl     %0, 16, %0
-               addx    %%g0, %0, %0
-               xnor    %%g0, %0, %0
-               "
-               : "=r" (sum)
-               : "r" (sum << 16), "0" (sum)); 
-       return sum;
+       return csum_fold(csum_partial(buff, len, 0));
 }
 
 #endif /* !(__SPARC_CHECKSUM_H) */
index 4d33990c1ed160f4e043400419266e5494f4fef6..ba4b626480be348dafc7c58facd0742e7b1907fd 100644 (file)
@@ -101,6 +101,7 @@ struct hd_geometry {
 #define HDIO_SET_NOWERR                0x0325  /* change ignore-write-error flag */
 #define HDIO_SET_DMA           0x0326  /* change use-dma flag */
 #define HDIO_SET_PIO_MODE      0x0327  /* reconfig interface to new speed */
+#define HDIO_SCAN_HWIF         0x0328  /* register and (re)scan interface */
 
 /* structure returned by HDIO_GET_IDENTITY, as per ANSI ATA2 rev.2f spec */
 struct hd_driveid {
@@ -162,15 +163,15 @@ struct hd_driveid {
 #ifdef CONFIG_BLK_DEV_HD
 void hd_setup(char *, int *);
 #endif /* CONFIG_BLK_DEV_HD */
+
 #ifdef CONFIG_BLK_DEV_IDE
 void ide_setup(char *);
+#endif /* CONFIG_BLK_DEV_IDE */
 
-#ifdef CONFIG_BLK_DEV_IDE_PCMCIA
+#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
 int ide_register(int io_port, int ctl_port, int irq);
 void ide_unregister(unsigned int);
-#endif  /* CONFIG_BLK_DEV_IDE_PCMCIA */
-
-#endif /* CONFIG_BLK_DEV_IDE */
+#endif /* CONFIG_BLK_DEV_IDE || CONFIG_BLK_DEV_IDE_MODULE */
 
 #endif  /* __KERNEL__ */
 
diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
new file mode 100644 (file)
index 0000000..0a099ff
--- /dev/null
@@ -0,0 +1,143 @@
+#ifndef _LINUX_ICMPV6_H
+#define _LINUX_ICMPV6_H
+
+#include <asm/byteorder.h>
+
+struct icmpv6hdr {
+
+       __u8            type;
+       __u8            code;
+       __u16           checksum;
+
+
+       union {
+               struct icmpv6_echo {
+                       __u16           identifier;
+                       __u16           sequence;
+               } u_echo;
+               __u32                   pointer;
+               __u32                   mtu;
+               __u32                   unused;
+
+                struct icmpv6_nd_advt {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+                        __u32          reserved:5,
+                                       override:1,
+                                       solicited:1,
+                                       router:1,
+                                       reserved2:24;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+                        __u32          router:1,
+                                       solicited:1,
+                                       override:1,
+                                       reserved:29;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif                                         
+                } u_nd_advt;
+
+                struct icmpv6_nd_ra {
+                       __u8            hop_limit;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+                       __u8            reserved:6,
+                                       other:1,
+                                       managed:1;
+
+#elif defined(__BIG_ENDIAN_BITFIELD)
+                       __u8            managed:1,
+                                       other:1,
+                                       reserved:6;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+                       __u16           rt_lifetime;
+                } u_nd_ra;
+
+       } u;
+
+#define icmp6_identifier       u.u_echo.identifier
+#define icmp6_sequence         u.u_echo.sequence
+#define icmp6_pointer          u.pointer
+#define icmp6_mtu              u.mtu
+#define icmp6_unused           u.unused
+#define icmp6_router           u.u_nd_advt.router
+#define icmp6_solicited                u.u_nd_advt.solicited
+#define icmp6_override         u.u_nd_advt.override
+#define icmp6_ndiscreserved    u.u_nd_advt.reserved
+#define icmp6_hop_limit                u.u_nd_ra.hop_limit
+#define icmp6_addrconf_managed u.u_nd_ra.managed
+#define icmp6_addrconf_other   u.u_nd_ra.other
+#define icmp6_rt_lifetime      u.u_nd_ra.rt_lifetime
+};
+
+
+#define ICMPV6_DEST_UNREACH            1
+#define ICMPV6_PKT_TOOBIG              2
+#define ICMPV6_TIME_EXCEEDED           3
+#define ICMPV6_PARAMETER_PROB          4
+
+#define ICMPV6_ECHO_REQUEST            128
+#define ICMPV6_ECHO_REPLY              129
+#define ICMPV6_MEMBERSHIP_QUERY                130
+#define ICMPV6_MEMBERSHIP_REPORT               131
+#define ICMPV6_MEMBERSHIP_REDUCTION            132
+
+/*
+ *     Codes for Destination Unreachable
+ */
+#define ICMPV6_NOROUTE                 0
+#define ICMPV6_ADM_PROHIBITED          1
+#define ICMPV6_NOT_NEIGHBOUR           2
+#define ICMPV6_ADDR_UNREACH            3
+#define ICMPV6_PORT_UNREACH            4
+
+/*
+ *     Codes for Time Exceeded
+ */
+#define ICMPV6_EXC_HOPLIMIT            0
+#define ICMPV6_EXC_FRAGTIME            1
+
+/*
+ *     Codes for Parameter Problem
+ */
+#define ICMPV6_HDR_FIELD               0
+#define ICMPV6_UNK_NEXTHDR             1
+#define ICMPV6_UNK_OPTION              2
+
+/*
+ *     constants for (set|get)sockopt
+ */
+
+#define RAW_CHECKSUM                   1
+#define ICMPV6_FILTER                  256
+
+/*
+ *     ICMPV6 filter
+ */
+
+#define ICMPV6_FILTER_BLOCK            1
+#define ICMPV6_FILTER_PASS             2
+#define ICMPV6_FILTER_BLOCKOTHERS      3
+#define ICMPV6_FILTER_PASSONLY         4
+
+struct icmp6_filter {
+       __u32           data[8];
+};
+
+#ifdef __KERNEL__
+
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+
+extern void                            icmpv6_send(struct sk_buff *skb,
+                                                   int type, int code,
+                                                   __u32 info, 
+                                                   struct device *dev);
+
+extern void                            icmpv6_init(struct proto_ops *ops);
+extern int                             icmpv6_err_convert(int type, int code,
+                                                          int *err);
+#endif
+
+#endif
index ae89faa525878219310d0c94b5ac660df593abdc..6e21bcd25291ce25db0691967d9e21c0bcdd1206 100644 (file)
@@ -52,6 +52,7 @@
 #define ARPHRD_SKIP    771             /* SKIP vif                     */
 #define ARPHRD_LOOPBACK        772             /* Loopback device              */
 #define ARPHRD_LOCALTLK 773            /* Localtalk device             */
+#define ARPHRD_SIT     774             /* sit0 device - IPv6-in-IPv4   */
 
 /* ARP protocol opcodes. */
 #define        ARPOP_REQUEST   1               /* ARP request                  */
index 0a594a3f0197a2b45e4fc135509ba39558c9957f..9da6c900f533c159647cf097ba16969114ada1aa 100644 (file)
@@ -32,7 +32,10 @@ enum {
   IPPROTO_UDP = 17,            /* User Datagram Protocol               */
   IPPROTO_IDP = 22,            /* XNS IDP protocol                     */
 
-  IPPROTO_RAW = 255,           /* Raw IP packets                       */
+  IPPROTO_IPV6  = 41,          /* IPv6-in-IPv4 tunnelling              */
+  IPPROTO_ICMPV6 = 58,         /* ICMPv6                               */
+
+  IPPROTO_RAW   = 255,         /* Raw IP packets                       */
   IPPROTO_MAX
 };
 
@@ -54,7 +57,7 @@ struct ip_mreq
 /* Structure describing an Internet (IP) socket address. */
 #define __SOCK_SIZE__  16              /* sizeof(struct sockaddr)      */
 struct sockaddr_in {
-  short int            sin_family;     /* Address family               */
+  unsigned short int   sin_family;     /* Address family               */
   unsigned short int   sin_port;       /* Port number                  */
   struct in_addr       sin_addr;       /* Internet address             */
 
@@ -126,23 +129,4 @@ struct sockaddr_in {
 
 #endif
 
-/*
- *     IPv6 definitions as we start to include them. This is just
- *     a beginning -- don't get excited 8)
- */
-struct in_addr6
-{
-       unsigned char s6_addr[16];
-};
-
-struct sockaddr_in6
-{
-       unsigned short sin6_family;
-       unsigned short sin6_port;
-       unsigned long sin6_flowinfo;
-       struct in_addr6 sin6_addr;
-};
-
-
 #endif /* _LINUX_IN_H */
diff --git a/include/linux/in6.h b/include/linux/in6.h
new file mode 100644 (file)
index 0000000..ebcd91e
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ *     Types and definitions for AF_INET6 
+ *     Linux INET6 implementation 
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *     Source:
+ *     IPv6 Program Interfaces for BSD Systems
+ *      <draft-ietf-ipngwg-bsd-api-05.txt>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_IN6_H
+#define _LINUX_IN6_H
+
+
+/*
+ *     IPv6 address structure
+ */
+
+struct in6_addr
+{
+       union 
+       {
+               unsigned char   u6_addr8[16];
+               __u32           u6_addr32[4];
+       } in6_u;
+#define s6_addr32              in6_u.u6_addr32
+#define s6_addr                        in6_u.u6_addr8
+};
+
+struct sockaddr_in6 {
+       unsigned short int      sin6_family;    /* AF_INET6 */
+       __u16                   sin6_port;      /* Transport layer port # */
+       __u32                   sin6_flowinfo;  /* IPv6 flow information */
+       struct in6_addr         sin6_addr;      /* IPv6 address */
+};
+
+
+struct ipv6_mreq {
+       /* IPv6 multicast address of group */
+       struct in6_addr ipv6mr_multiaddr;
+
+       /* local IPv6 address of interface */
+       struct in6_addr ipv6mr_interface;
+};
+
+/*
+ *     Bitmask constant declarations to help applications select out the 
+ *     flow label and priority fields.
+ *
+ *     Note that this are in host byte order while the flowinfo field of
+ *     sockaddr_in6 is in network byte order.
+ */
+
+#define IPV6_FLOWINFO_FLOWLABEL                0x00ff
+#define IPV6_FLOWINFO_PRIORITY         0x0f00
+
+#define IPV6_PRIORITY_UNCHARACTERIZED  0x0000
+#define IPV6_PRIORITY_FILLER           0x0100
+#define IPV6_PRIORITY_UNATTENDED       0x0200
+#define IPV6_PRIORITY_RESERVED1                0x0300
+#define IPV6_PRIORITY_BULK             0x0400
+#define IPV6_PRIORITY_RESERVED2                0x0500
+#define IPV6_PRIORITY_INTERACTIVE      0x0600
+#define IPV6_PRIORITY_CONTROL          0x0700
+#define IPV6_PRIORITY_8                        0x0800
+#define IPV6_PRIORITY_9                        0x0900
+#define IPV6_PRIORITY_10               0x0a00
+#define IPV6_PRIORITY_11               0x0b00
+#define IPV6_PRIORITY_12               0x0c00
+#define IPV6_PRIORITY_13               0x0d00
+#define IPV6_PRIORITY_14               0x0e00
+#define IPV6_PRIORITY_15               0x0f00
+
+/*
+ *     IPV6 socket options
+ */
+
+#define IPV6_ADDRFORM          1
+#define IPV6_RXINFO            2
+#define IPV6_TXINFO            IPV6_RXINFO
+#define SCM_SRCINFO            IPV6_TXINFO
+#define SCM_SRCRT              4
+#define IPV6_UNICAST_HOPS      5
+
+
+#define IPV6_MULTICAST_IF      17
+#define IPV6_MULTICAST_HOPS    18
+#define IPV6_MULTICAST_LOOP    19
+#define IPV6_ADD_MEMBERSHIP    20
+#define IPV6_DROP_MEMBERSHIP   21
+
+#endif
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
new file mode 100644 (file)
index 0000000..78bb922
--- /dev/null
@@ -0,0 +1,105 @@
+#ifndef _IPV6_H
+#define _IPV6_H
+
+#include <linux/in6.h>
+#include <asm/byteorder.h>
+
+/*
+ *     IPv6 fixed header
+ */
+
+struct ipv6hdr {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u8                    priority:4,
+                               version:4;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       __u8                    version:4,
+                               priority:4;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif                                         
+       __u8                    flow_lbl[3];
+
+       __u16                   payload_len;
+       __u8                    nexthdr;
+       __u8                    hop_limit;
+
+       struct  in6_addr        saddr;
+       struct  in6_addr        daddr;
+};
+
+struct in6_ifreq {
+       struct in6_addr addr;
+       __u32           prefix_len;
+       char            devname[8]; 
+};
+
+/*
+ *     Advanced API
+ *     source interface/address selection, source routing, etc...
+ *     *under construction*
+ */
+
+
+struct in6_pktinfo {
+       int             ipi6_ifindex;
+       struct in6_addr ipi6_addr;
+};
+
+#define IPV6_SRCRT_STRICT      0x01    /* this hop must be a neighbor  */
+#define IPV6_SRCRT_TYPE_0      0       /* IPv6 type 0 Routing Header   */
+
+/*
+ *     routing header
+ */
+struct ipv6_rt_hdr {
+       __u8            nexthdr;
+       __u8            hdrlen;
+       __u8            type;
+       __u8            segments_left;
+
+       /*
+        *      type specific data
+        *      variable length field
+        */
+};
+
+/*
+ *     routing header type 0 (used in cmsghdr struct)
+ */
+
+struct rt0_hdr {
+       struct ipv6_rt_hdr      rt_hdr;
+       __u32                   bitmap;         /* strict/loose bit map */
+       struct in6_addr         addr[0];
+
+#define rt0_type               rt_hdr.type;
+};
+
+#ifdef __KERNEL__
+
+/*
+ *     The length of this struct cannot be greater than the length of
+ *     the proto_priv field in a sk_buff which is currently
+ *     defined to be 16 bytes.
+ *     Pointers take upto 8 bytes (sizeof(void *) is 8 on the alpha).
+ */
+struct ipv6_options 
+{
+       /* length of extension headers   */
+
+       __u16                   opt_flen;       /* after fragment hdr */
+       __u16                   opt_nflen;      /* before fragment hdr */
+
+       /* 
+        * protocol options 
+        * usualy carried in IPv6 extension headers
+        */
+
+       struct ipv6_rt_hdr              *srcrt; /* Routing Header */
+
+};
+
+#endif
+
+#endif
diff --git a/include/linux/ipv6_route.h b/include/linux/ipv6_route.h
new file mode 100644 (file)
index 0000000..0d70912
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ *     Linux INET6 implementation 
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _LINUX_IPV6_ROUTE_H
+#define _LINUX_IPV6_ROUTE_H
+
+#include <linux/route.h>
+
+#define RTI_DEVRT      0x00010000      /* route lookup, dev must match */
+#define RTI_ALLONLINK  0x00020000      /* all destinations on link     */
+#define RTI_DCACHE     RTF_DCACHE      /* rt6_info is a dcache entry   */
+#define RTI_INVALID    RTF_INVALID     /* invalid route/dcache entry   */
+
+#define RTI_DYNAMIC    RTF_DYNAMIC     /* rt6_info created dynamicly   */
+#define RTI_GATEWAY    RTF_GATEWAY
+#define RTI_DYNMOD     RTF_MODIFIED    /* more specific route may exist*/
+
+#define DCF_PMTU       RTF_MSS         /* dest cache has valid PMTU    */
+#define DCF_INVALID    RTF_INVALID
+
+struct in6_rtmsg {
+       __u32                   rtmsg_type;
+       struct in6_addr         rtmsg_dst;
+       struct in6_addr         rtmsg_gateway;
+       __u16                   rtmsg_prefixlen;
+       __u16                   rtmsg_metric;
+       char                    rtmsg_device[16];
+        __u16                  rtmsg_flags;
+       unsigned long           rtmsg_info;
+};
+
+#endif
index 3757ddb69631da4fa2aaf797be98090218e208f5..57a5323e150761d7db3bcfb8cdef5525e28c8d9d 100644 (file)
@@ -352,8 +352,9 @@ extern unsigned short plain_map[NR_KEYS];
 #define K_DCIRCM       K(KT_DEAD,2)
 #define K_DTILDE       K(KT_DEAD,3)
 #define K_DDIERE       K(KT_DEAD,4)
+#define K_DCEDIL       K(KT_DEAD,5)
 
-#define NR_DEAD                5
+#define NR_DEAD                6
 
 #define K_DOWN         K(KT_CUR,0)
 #define K_LEFT         K(KT_CUR,1)
index d0f300c4f8607ffe3ce7137c89e3f3638b6038ed..14428592c4f63df9e70003a68c5c4f5b14b06118 100644 (file)
@@ -11,7 +11,7 @@
 #define MAX_CANON        255   /* size of the canonical input queue */
 #define MAX_INPUT        255   /* size of the type-ahead buffer */
 #define NAME_MAX         255   /* # chars in a file name */
-#define PATH_MAX        1024   /* # chars in a path name */
+#define PATH_MAX        4095   /* # chars in a path name */
 #define PIPE_BUF        4096   /* # bytes in atomic write to a pipe */
 
 #endif
index cff2d9769ee31465fc0dfe1297752d4a2d233e1d..9735bc630ee9b9db83e18a756bb24376ca16cba3 100644 (file)
@@ -28,7 +28,7 @@ extern inline void wait_on_buffer(struct buffer_head * bh)
 
 extern inline void lock_buffer(struct buffer_head * bh)
 {
-       if (set_bit(BH_Lock, &bh->b_state))
+       while (set_bit(BH_Lock, &bh->b_state))
                __wait_on_buffer(bh);
 }
 
index c442b1138949f7b1aa11f0748f92c712e9b9ea3b..1bf540e936157b35fa8835ee1eec9b2fac3bb184 100644 (file)
@@ -58,6 +58,10 @@ struct symbol_table { /* received from "insmod" */
 /*
  * Note: The string table follows immediately after the symbol table in memory!
  */
+struct _exceptinfo{
+       struct exception_table_entry *start;
+       struct exception_table_entry *stop;
+};
 
 struct module {
        struct module *next;
@@ -68,11 +72,26 @@ struct module {
        void* addr;                     /* address of module */
        int state;
        void (*cleanup)(void);          /* cleanup routine */
+       struct _exceptinfo exceptinfo;
 };
 
+/*
+       prior to modules-2.1 there were no real way to identify
+       which insmod is talking to us Now a special signature must
+       be written here.
+
+       The new module utilities knows about older kernel and write
+       the init in the signature and the cleanup in the init.
+       This is to make sure newer utilities work with older kernel
+       so it is simple for people to upgrade.
+*/
+#define MODULE_2_1_7_SIG       ((void*)0x00000217)
+
 struct mod_routines {
+       void *signature;
        int (*init)(void);              /* initialization routine */
        void (*cleanup)(void);          /* cleanup routine */
+       struct _exceptinfo exceptinfo;
 };
 
 /*
@@ -104,6 +123,7 @@ int Using_Versions; /* gcc will handle this global (used as a flag) correctly */
 #define MOD_INC_USE_COUNT      do { } while (0)
 #define MOD_DEC_USE_COUNT      do { } while (0)
 #define MOD_IN_USE             1
+extern struct module *module_list;
 
 #endif
 
index f92295ce3ba05d0a03beb52388a54bd599123630..e4783e28d8fcc6c798b785575f43a93159de4a1f 100644 (file)
 /* for future expansion when we will have different priorities. */
 #define DEV_NUMBUFFS   3
 #define MAX_ADDR_LEN   7
-#ifndef CONFIG_AX25
-#ifndef CONFIG_TR
-#ifndef CONFIG_NET_IPIP
-#define MAX_HEADER     32              /* We really need about 18 worst case .. so 32 is aligned */
+
+#if !defined(CONFIG_AX25) && !defined(CONFIG_TR)
+#define LL_MAX_HEADER  32
 #else
-#define MAX_HEADER     80              /* We need to allow for having tunnel headers */
-#endif  /* IPIP */
+#if defined(CONFIG_AX25)
+#define LL_MAX_HEADER  96
 #else
-#define MAX_HEADER     48              /* Token Ring header needs 40 bytes ... 48 is aligned */ 
-#endif /* TR */
+#define LL_MAX_HEADER  48
+#endif
+#endif
+
+#if !defined(CONFIG_NET_IPIP) && \
+    !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE)
+#define MAX_HEADER LL_MAX_HEADER
 #else
-#define MAX_HEADER     96              /* AX.25 + NetROM */
-#endif /* AX25 */
+#define MAX_HEADER (LL_MAX_HEADER + 48)
+#endif
 
 #define IS_MYADDR      1               /* address is (one of) our own  */
 #define IS_LOOPBACK    2               /* address is for LOOPBACK      */
@@ -144,7 +148,13 @@ struct device
   unsigned char                  pad;                          /* make dev_addr aligned to 8 bytes */
   unsigned char                  dev_addr[MAX_ADDR_LEN];       /* hw address   */
   unsigned char                  addr_len;     /* hardware address length      */
+#if 0
+  __u32                          pa_addr_arr[4];
+  __u16                          pa_prefix_len;
+#define pa_addr                  pa_addr_arr[3];
+#else
   unsigned long                  pa_addr;      /* protocol address             */
+#endif
   unsigned long                  pa_brdaddr;   /* protocol broadcast addr      */
   unsigned long                  pa_dstaddr;   /* protocol P-P other side addr */
   unsigned long                  pa_mask;      /* protocol netmask             */
index ef81f7d54faf655020cc35924c7cd73332bba079..124c15349a9fa2a76d0a044c4caa20ef3737fdbd 100644 (file)
 #define PCI_DEVICE_ID_DEC_FDDI         0x000F
 #define PCI_DEVICE_ID_DEC_TULIP_PLUS   0x0014
 #define PCI_DEVICE_ID_DEC_21052_AB     0x0021
+#define PCI_DEVICE_ID_DEC_21152_AA     0x0024
 
 #define PCI_VENDOR_ID_CIRRUS           0x1013
 #define PCI_DEVICE_ID_CIRRUS_5430      0x00a0
index ef8210c6fd34c94dbb6f36455d33c2893f3963f6..1b20ab9e1a531937c0f71d537113e51e82a922c5 100644 (file)
@@ -69,6 +69,7 @@
 #define PPP_IPX                0x2b    /* IPX protocol */
 #define        PPP_VJC_COMP    0x2d    /* VJ compressed TCP */
 #define        PPP_VJC_UNCOMP  0x2f    /* VJ uncompressed TCP */
+#define PPP_IPV6       0x57    /* Internet Protocol Version 6 */
 #define PPP_COMP       0xfd    /* compressed packet */
 #define PPP_IPCP       0x8021  /* IP Control Protocol */
 #define PPP_IPXCP      0x802b  /* IPX Control Protocol */
index b944826603398e7319e87e6d9e1f09906b7f63d8..9816bb72b4da09d971a753f4bb08888146242e71 100644 (file)
@@ -102,6 +102,8 @@ enum net_directory_inos {
        PROC_NET_ALIAS_TYPES,
        PROC_NET_ALIASES,
        PROC_NET_IP_MASQ_APP,
+       PROC_NET_RT6,
+       PROC_NET_RT6_STATS,
        PROC_NET_STRIP_STATUS,
        PROC_NET_STRIP_TRACE,
        PROC_NET_Z8530,
index 5be4853e5a76c9fcab93136bfe9f1d8680e5339c..fbf3cec72af40d1ce6680cddbe26928ecb00acf0 100644 (file)
@@ -53,6 +53,12 @@ struct rtentry
 #define RTF_IRTT       0x0100          /* Initial round trip time        */
 #define RTF_REJECT     0x0200          /* Reject route                   */
 
+#define RTF_ADDRCONF   0x0800          /* announced on link prefix       */
+#define RTF_INVALID    0x1000
+#define RTF_DCACHE     0x2000
+#define RTF_DEFAULT    0x4000          /* Route is a default route       */
+#define RTF_NEXTHOP    0x8000          /* Non gateway route with nexthop */
+
 /*
  *     This structure is passed from the kernel to user space by netlink
  *     routing/device announcements
index 335c74eecb0dbc3da9abfa205f6931bd5b31201d..23b846ad5a2e86ef5b9b643fffbe36a264661b9a 100644 (file)
@@ -44,7 +44,19 @@ struct serial_struct {
 #define PORT_16550A    4
 #define PORT_CIRRUS     5
 #define PORT_16650     6
-#define PORT_MAX       6
+#define PORT_16650V2   7
+#define PORT_16750     8
+#define PORT_MAX       8
+
+struct serial_uart_config {
+       char    *name;
+       int     dfl_xmit_fifo_size;
+       int     flags;
+};
+
+#define UART_CLEAR_FIFO                0x01
+#define UART_USE_FIFO          0x02
+#define UART_STARTECH          0x04
 
 /*
  * Definitions for async_struct (and serial_struct) flags field
@@ -55,7 +67,7 @@ struct serial_struct {
 #define ASYNC_SAK      0x0004  /* Secure Attention Key (Orange book) */
 #define ASYNC_SPLIT_TERMIOS 0x0008 /* Separate termios for dialin/callout */
 
-#define ASYNC_SPD_MASK 0x0030
+#define ASYNC_SPD_MASK 0x1030
 #define ASYNC_SPD_HI   0x0010  /* Use 56000 instead of 38400 bps */
 
 #define ASYNC_SPD_VHI  0x0020  /* Use 115200 instead of 38400 bps */
@@ -67,8 +79,13 @@ struct serial_struct {
 #define ASYNC_PGRP_LOCKOUT    0x0200 /* Lock out cua opens based on pgrp */
 #define ASYNC_CALLOUT_NOHUP   0x0400 /* Don't do hangups for cua device */
 
-#define ASYNC_FLAGS    0x0FFF  /* Possible legal async flags */
-#define ASYNC_USR_MASK 0x0430  /* Legal flags that non-privileged
+#define ASYNC_HARDPPS_CD       0x0800  /* Call hardpps when CD goes high  */
+
+#define ASYNC_SPD_SHI  0x1000  /* Use 230400 instead of 38400 bps */
+#define ASYNC_SPD_WARP 0x1010  /* Use 460800 instead of 38400 bps */
+
+#define ASYNC_FLAGS    0x1FFF  /* Possible legal async flags */
+#define ASYNC_USR_MASK 0x1430  /* Legal flags that non-privileged
                                 * users can set or reset */
 
 /* Internal flags used only by kernel/chr_drv/serial.c */
@@ -81,6 +98,8 @@ struct serial_struct {
 #define ASYNC_CHECK_CD         0x02000000 /* i.e., CLOCAL */
 #define ASYNC_SHARE_IRQ                0x01000000 /* for multifunction cards */
 
+#define ASYNC_INTERNAL_FLAGS   0xFF000000 /* Internal flags */
+
 /*
  * Multiport serial configuration structure --- external structure
  */
@@ -128,20 +147,37 @@ struct async_icount {
        __u32   cts, dsr, rng, dcd;     
 };
 
+struct serial_state {
+       int     magic;
+       int     baud_base;
+       int     port;
+       int     irq;
+       int     flags;
+       int     hub6;
+       int     type;
+       int     line;
+       int     xmit_fifo_size;
+       int     custom_divisor;
+       int     count;
+       unsigned short  close_delay;
+       unsigned short  closing_wait; /* time to wait before closing */
+       struct async_icount     icount; 
+       struct termios          normal_termios;
+       struct termios          callout_termios;
+       struct async_struct *info;
+};
+
 struct async_struct {
        int                     magic;
-       int                     baud_base;
        int                     port;
-       int                     irq;
-       int                     flags;          /* defined in tty.h */
-       int                     hub6;           /* HUB6 plus one */
-       int                     type;           /* UART type */
+       int                     hub6;
+       int                     flags;
+       int                     xmit_fifo_size;
+       struct serial_state     *state;
        struct tty_struct       *tty;
        int                     read_status_mask;
        int                     ignore_status_mask;
        int                     timeout;
-       int                     xmit_fifo_size;
-       int                     custom_divisor;
        int                     x_char; /* xon/xoff character */
        int                     close_delay;
        unsigned short          closing_wait;
@@ -152,7 +188,6 @@ struct async_struct {
        unsigned long           event;
        unsigned long           last_active;
        int                     line;
-       int                     count;      /* # of fd on device */
        int                     blocked_open; /* # of blocked opens */
        long                    session; /* Session of opening process */
        long                    pgrp; /* pgrp of opening process */
@@ -162,17 +197,15 @@ struct async_struct {
        int                     xmit_cnt;
        struct tq_struct        tqueue;
        struct tq_struct        tqueue_hangup;
-       struct termios          normal_termios;
-       struct termios          callout_termios;
        struct wait_queue       *open_wait;
        struct wait_queue       *close_wait;
        struct wait_queue       *delta_msr_wait;
-       struct async_icount     icount; /* kernel counters for the 4 input interrupts */
        struct async_struct     *next_port; /* For the linked list */
        struct async_struct     *prev_port;
 };
 
 #define SERIAL_MAGIC 0x5301
+#define SSTATE_MAGIC 0x5302
 
 /*
  * The size of the serial xmit buffer is 1 page, or 4096 bytes
index 19d7c0ba7a74e5635ed2f2798a71151b55af93ee..c982ae28b4560ced7fa27d4b57b1059a93b18460 100644 (file)
@@ -51,6 +51,8 @@
 #define UART_FCR6_T_TRIGGER_8  0x10 /* Mask for transmit trigger set at 8 */
 #define UART_FCR6_T_TRIGGER_24  0x20 /* Mask for transmit trigger set at 24 */
 #define UART_FCR6_T_TRIGGER_30 0x30 /* Mask for transmit trigger set at 30 */
+/* TI 16750 definitions */
+#define UART_FCR7_64BYTE       0x20 /* Go into 64 byte mode */
 
 /*
  * These are the definitions for the Line Control Register
 #define UART_IER_RLSI  0x04    /* Enable receiver line status interrupt */
 #define UART_IER_THRI  0x02    /* Enable Transmitter holding register int. */
 #define UART_IER_RDI   0x01    /* Enable receiver data interrupt */
+/*
+ * Sleep mode for ST16650 and TI16750.
+ * Note that for 16650, EFR-bit 4 must be selected as well.
+ */
+#define UART_IERX_SLEEP  0x10  /* Enable sleep mode */
 
 /*
  * These are the definitions for the Modem Control Register
 #define UART_EFR_CTS   0x80    /* CTS flow control */
 #define UART_EFR_RTS   0x40    /* RTS flow control */
 #define UART_EFR_SCD   0x20    /* Special character detect */
-#define UART_EFR_ENI   0x10    /* Enhanced Interrupt */
+#define UART_EFR_ECB   0x10    /* Enhanced control bit */
 /*
  * the low four bits control software flow control
  */
index 9c20ef9c496e18a0d8544d422c008a0f5506f903..2d4e769f6440d733fd8b26d4a96ea4d67b3612c1 100644 (file)
@@ -77,6 +77,17 @@ struct sk_buff
        } mac;
   
        struct iphdr    *ip_hdr;                /* For IPPROTO_RAW                              */
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+       struct ipv6hdr  *ipv6_hdr;
+       
+       /* 
+        *      It would be inefficient to store the nexthop address in every
+        *      skb. Instead we store a pointer to the respective neighbour
+        *      cache entry. This might make ndisc cache management harder.
+        */
+
+       struct neighbour *nexthop;
+#endif         
        unsigned long   len;                    /* Length of actual data                        */
        unsigned long   csum;                   /* Checksum                                     */
        __u32           saddr;                  /* IP source address                            */
@@ -100,6 +111,7 @@ struct sk_buff
 #define PACKET_BROADCAST       1               /* To all                                       */
 #define PACKET_MULTICAST       2               /* To group                                     */
 #define PACKET_OTHERHOST       3               /* To someone else                              */
+#define PACKET_NDISC           17              /* Outgoing NDISC packet                        */
        unsigned short  users;                  /* User count - see datagram.c,tcp.c            */
        unsigned short  protocol;               /* Packet protocol from driver.                 */
        unsigned short  truesize;               /* Buffer size                                  */
@@ -112,6 +124,7 @@ struct sk_buff
        unsigned char   *end;                   /* End pointer                                  */
        void            (*destructor)(struct sk_buff *);        /* Destruct function            */
        __u16           redirport;              /* Redirect port                                */
+       __u16           inclone;                /* Inline clone */
 };
 
 #ifdef CONFIG_SKB_LARGE
index d63a9046d9efdd5153994a0523fdb3e9ac238ca0..bb8d583266c17b8a66b40beb25e2e0b6698073c5 100644 (file)
@@ -33,6 +33,44 @@ struct msghdr
        int             msg_flags;      /* 4.4 BSD item we dont use      */
 };
 
+/*
+ *     POSIX 1003.1g - ancillary data object information
+ *     Ancillary data consits of a sequence of pairs of
+ *     (cmsghdr, cmsg_data[])
+ */
+
+struct cmsghdr {
+       size_t          cmsg_len;       /* data byte count, including hdr */
+        int            cmsg_level;     /* originating protocol */
+        int            cmsg_type;      /* protocol-specific type */
+       unsigned char   cmsg_data[0];
+};
+
+/*
+ *     Ancilliary data object information MACROS
+ *     Table 5-14 of POSIX 1003.1g
+ */
+
+#define CMSG_DATA(cmsg)                cmsg->cmsg_data
+#define CMSG_NXTHDR(mhdr, cmsg) cmsg_nxthdr(mhdr, cmsg)
+#define CMSG_FIRST(mhdr)       ((struct cmsghdr *) (mhdr)->msg_control)
+
+extern __inline__ struct cmsghdr * cmsg_nxthdr(struct msghdr *mhdr,
+                                              struct cmsghdr *cmsg)
+{
+       void * ptr;
+
+       if (cmsg->cmsg_len < sizeof(struct cmsghdr))
+       {
+               return NULL;
+       }
+       ptr = ((unsigned char *) cmsg) +  cmsg->cmsg_len;
+       if (ptr >= mhdr->msg_control + mhdr->msg_controllen)
+               return NULL;
+
+       return ptr;
+}
+
 /* Control Messages */
 
 #define SCM_RIGHTS             1
@@ -90,6 +128,9 @@ struct msghdr
 
 /* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
 #define SOL_IP         0
+#define SOL_IPV6       41
+#define SOL_ICMPV6     58
+#define SOL_RAW                255
 #define SOL_IPX                256
 #define SOL_AX25       257
 #define SOL_ATALK      258
@@ -132,6 +173,13 @@ struct msghdr
 
 #ifdef __KERNEL__
 extern void memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
+extern void memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, 
+                               int offset, int len);
+extern unsigned int csum_partial_copy_fromiovecend(unsigned char *kdata, 
+                                                  struct iovec *iov, 
+                                                  int offset, 
+                                                  int len, int csum);
+
 extern int verify_iovec(struct msghdr *m, struct iovec *iov, char *address, int mode);
 extern void memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len);
 extern int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen);
index 70b8093c08bc29f9d6c416f626978d34a23c0650..26bedd3f111960e12eb52d7b58f197d14b558378 100644 (file)
@@ -80,6 +80,7 @@ struct __sysctl_args {
 #define NET_NETROM      8
 #define NET_AX25        9
 #define NET_BRIDGE     10
+#define NET_IPV6       11
 
 /* /proc/sys/net/core */
 
@@ -97,7 +98,11 @@ struct __sysctl_args {
 #define NET_IPV4_ARP_CHECK_INTERVAL     5
 #define NET_IPV4_ARP_CONFIRM_INTERVAL   6
 #define NET_IPV4_ARP_CONFIRM_TIMEOUT   7
+#define NET_IPV4_TCP_VEGAS_CONG_AVOID   8
 
+/* /proc/sys/net/ipv6 */
+#define NET_IPV6_FORWARDING            1
+#define NET_IPV6_HOPLIMIT              2
 /* /proc/sys/net/ipx */
 
 /* /proc/sys/net/appletalk */
index 2d6602982d6d4a99ed24bc5c4b5db574fa0f42da..2e4217a21c06f1f0efe2892699e11fb847b32e1f 100644 (file)
@@ -267,6 +267,8 @@ struct tty_struct {
 #define TTY_DO_WRITE_WAKEUP 5
 #define TTY_PUSH 6
 #define TTY_CLOSING 7
+#define TTY_HW_COOK_OUT 14
+#define TTY_HW_COOK_IN 15
 
 #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty))
 
index 3468fa2d73f616919f9c44a76e38f2d2a1047afe..24412204703a062d6ceba86b0ee9933c35ec83e4 100644 (file)
  *
  *     This routine notifies the tty driver that it should hangup the
  *     tty device.
+ *
+ * void (*wait_until_sent)(struct tty_struct *tty, int timeout);
  * 
+ *     This routine waits until the device has written out all of the
+ *     characters in its transmitter FIFO.
+ *
+ * void (*send_xchar)(struct tty_struct *tty, char ch);
+ *
+ *     This routine is used to send a high-priority XON/XOFF
+ *     character to the device.
  */
 
 #include <linux/fs.h>
@@ -139,6 +148,8 @@ struct tty_driver {
        void (*hangup)(struct tty_struct *tty);
        void (*flush_buffer)(struct tty_struct *tty);
        void (*set_ldisc)(struct tty_struct *tty);
+       void (*wait_until_sent)(struct tty_struct *tty, int timeout);
+       void (*send_xchar)(struct tty_struct *tty, char ch);
 
        /*
         * linked list pointers
index 87b54ca307c03433a360e2ce24fd49435d512351..05a33261d1bc47fa93ca46490c040507397893d2 100644 (file)
@@ -2,7 +2,100 @@
 #define _LINUX_TTY_LDISC_H
 
 /*
- * Definitions for the tty line discipline
+ * This structure defines the interface between the tty line discpline
+ * implementation and the tty routines.  The following routines can be
+ * defined; unless noted otherwise, they are optional, and can be
+ * filled in with a null pointer.
+ *
+ * int (*open)(struct tty_struct *);
+ *
+ *     This function is called when the line discpline is associated
+ *     with the tty.  The line discpline can use this as an
+ *     opportunity to initialize any state needed by the ldisc routines.
+ * 
+ * void        (*close)(struct tty_struct *);
+ *
+ *     This function is called when the line discpline is being
+ *     shutdown, either because the tty is being closed or because
+ *     the tty is being changed to use a new line discpline
+ * 
+ * void        (*flush_buffer)(struct tty_struct *tty);
+ *
+ *     This function instructs the line discipline to clear its
+ *     buffers of any input characters it may have queued to be
+ *     delivered to the user mode process.
+ * 
+ * int (*chars_in_buffer)(struct tty_struct *tty);
+ *
+ *     This function returns the number of input characters the line
+ *     iscpline may have queued up to be delivered to the user mode
+ *     process.
+ * 
+ * int (*read)(struct tty_struct * tty, struct file * file,
+ *             unsigned char * buf, unsigned int nr);
+ *
+ *     This function is called when the user requests to read from
+ *     the tty.  The line discpline will return whatever characters
+ *     it has buffered up for the user.  If this function is not
+ *     defined, the user will receive an EIO error.
+ * 
+ * int (*write)(struct tty_struct * tty, struct file * file,
+ *              const unsigned char * buf, unsigned int nr);
+ *
+ *     This function is called when the user requests to write to the
+ *     tty.  The line discpline will deliver the characters to the
+ *     low-level tty device for transmission, optionally performing
+ *     some processing on the characters first.  If this function is
+ *     not defined, the user will receive an EIO error.
+ * 
+ * int (*ioctl)(struct tty_struct * tty, struct file * file,
+ *              unsigned int cmd, unsigned long arg);
+ *
+ *     This function is called when the user requests an ioctl which
+ *     is not handled by the tty layer or the low-level tty driver.
+ *     It is intended for ioctls which affect line discpline
+ *     operation.  Not that the search order for ioctls is (1) tty
+ *     layer, (2) tty low-level driver, (3) line discpline.  So a
+ *     low-level driver can "grab" an ioctl request before the line
+ *     discpline has a chance to see it.
+ * 
+ * void        (*set_termios)(struct tty_struct *tty, struct termios * old);
+ *
+ *     This function notifies the line discpline that a change has
+ *     been made to the termios stucture.
+ * 
+ * int (*select)(struct tty_struct * tty, struct inode * inode,
+ *               struct file * file, int sel_type,
+ *               struct select_table_struct *wait);
+ *
+ *     This function is called when a user attempts to select on a
+ *     tty device.  It is solely the responsibility of the line
+ *     discipline to handle select requests.
+ *
+ * void        (*receive_buf)(struct tty_struct *, const unsigned char *cp,
+ *                    char *fp, int count);
+ *
+ *     This function is called by the low-level tty driver to send
+ *     characters received by the hardware to the line discpline for
+ *     processing.  <cp> is a pointer to the buffer of input
+ *     character received by the device.  <fp> is a pointer to a
+ *     pointer of flag bytes which indicate whether a character was
+ *     received with a parity error, etc.
+ * 
+ * int (*receive_room)(struct tty_struct *);
+ *
+ *     This function is called by the low-level tty driver to
+ *     determine how many characters the line discpline can accept.
+ *     The low-level driver must not send more characters than was
+ *     indicated by receive_room, or the line discpline may drop
+ *     those characters.
+ * 
+ * void        (*write_wakeup)(struct tty_struct *);
+ *
+ *     This function is called by the low-level tty driver to signal
+ *     that line discpline should try to send more characters to the
+ *     low-level driver for transmission.  If the line discpline does
+ *     not have any more data to send, it can just return.
  */
 
 #include <linux/fs.h>
index 4428194ff914d29bcf2274cfbe257eb4c98db6da..e11ad75fe0a21ecfb95bb7b7dd2b4955bbac633b 100644 (file)
@@ -8,11 +8,4 @@ struct sockaddr_un {
        char sun_path[UNIX_PATH_MAX];   /* pathname */
 };
 
-struct cmsghdr {
-       unsigned int cmsg_len;
-       int cmsg_level;
-       int cmsg_type;
-       unsigned char cmsg_data[0];
-};
-
 #endif /* _LINUX_UN_H */
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
new file mode 100644 (file)
index 0000000..1ea3930
--- /dev/null
@@ -0,0 +1,135 @@
+#ifndef _ADDRCONF_H
+#define _ADDRCONF_H
+
+#define RETRANS_TIMER  HZ
+
+#define MAX_RTR_SOLICITATIONS          3
+#define RTR_SOLICITATION_INTERVAL      (4*HZ)
+
+#define ADDR_CHECK_FREQUENCY           (120*HZ)
+
+struct prefix_info {
+       __u8                    type;
+       __u8                    length;
+       __u8                    prefix_len;
+
+#if defined(__BIG_ENDIAN_BITFIELD)
+       __u8                    onlink : 1,
+                               autoconf : 1,
+                               reserved : 6;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+       __u8                    reserved : 6,
+                               autoconf : 1,
+                               onlink : 1;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+       __u32                   valid;
+       __u32                   prefered;
+       __u32                   reserved2;
+
+       struct in6_addr         prefix;
+};
+
+
+#ifdef __KERNEL__
+
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <net/if_inet6.h>
+
+extern struct inet6_ifaddr     *inet6_addr_lst[16];
+extern struct ipv6_mc_list     *inet6_mcast_lst[16];
+extern struct inet6_dev                *inet6_dev_lst;
+
+extern void                    addrconf_init(void);
+extern void                    addrconf_cleanup(void);
+
+extern int                     addrconf_notify(struct notifier_block *this, 
+                                               unsigned long event, 
+                                               void * data);
+
+extern int                     addrconf_add_ifaddr(void *arg);
+extern int                     addrconf_set_dstaddr(void *arg);
+
+extern struct inet6_ifaddr *   ipv6_chk_addr(struct in6_addr *addr);
+extern struct inet6_ifaddr *   ipv6_get_saddr(struct rt6_info *rt, 
+                                              struct in6_addr *daddr);
+extern struct inet6_ifaddr *   ipv6_get_lladdr(struct device *dev);
+
+/*
+ *     multicast prototypes (mcast.c)
+ */
+extern int                     ipv6_sock_mc_join(struct sock *sk, 
+                                                 struct device *dev, 
+                                                 struct in6_addr *addr);
+extern int                     ipv6_sock_mc_drop(struct sock *sk,
+                                                 struct device *dev, 
+                                                 struct in6_addr *addr);
+extern void                    ipv6_sock_mc_close(struct sock *sk);
+
+extern int                     ipv6_dev_mc_inc(struct device *dev,
+                                               struct in6_addr *addr);
+extern int                     ipv6_dev_mc_dec(struct device *dev,
+                                               struct in6_addr *addr);
+
+extern int                     ipv6_chk_mcast_addr(struct device *dev,
+                                                   struct in6_addr *addr);
+
+extern void                    addrconf_prefix_rcv(struct device *dev,
+                                                   u8 *opt, int len);
+
+extern struct inet6_dev *      ipv6_dev_by_index(int index);
+extern struct inet6_dev *      ipv6_get_idev(struct device *dev);
+
+extern void                    addrconf_forwarding_on(void);
+/*
+ *     Hash function taken from net_alias.c
+ */
+
+static __inline__ u8 ipv6_addr_hash(struct in6_addr *addr)
+{      
+       __u32 word;
+       unsigned tmp;
+
+       /* 
+        * We perform the hash function over the last 64 bits of the address
+        * This will include the IEEE address token on links that support it.
+        */
+
+       word = addr->s6_addr[2] ^ addr->s6_addr32[3];
+       tmp  = word ^ (word>>16);
+       tmp ^= (tmp >> 8);
+
+       return ((tmp ^ (tmp >> 4)) & 0x0f);
+}
+
+/*
+ *     compute link-local solicited-node multicast address
+ */
+
+static __inline__ void addrconf_addr_solict_mult(struct in6_addr *addr,
+                                                struct in6_addr *solicited)
+{
+       ipv6_addr_set(solicited,
+                     __constant_htonl(0xFF020000), 0,
+                     __constant_htonl(0x1), addr->s6_addr32[3]);
+}
+
+static __inline__ void ipv6_addr_all_nodes(struct in6_addr *addr)
+{
+       ipv6_addr_set(addr,
+                     __constant_htonl(0xFF020000), 0, 0,
+                     __constant_htonl(0x1));
+}
+
+static __inline__ void ipv6_addr_all_routers(struct in6_addr *addr)
+{
+       ipv6_addr_set(addr,
+                     __constant_htonl(0xFF020000), 0, 0,
+                     __constant_htonl(0x2));
+}
+
+
+#endif
+#endif
index aee4fd476d06b61972ca2b666d935e4a45092f40..7d877d18c1549e82c8790ba5d4e5940913dd5334 100644 (file)
  *             as published by the Free Software Foundation; either version
  *             2 of the License, or (at your option) any later version.
  */
+
+/*
+ *     Fixes:
+ *
+ *     Ralf Baechle                    :       generic ipv6 checksum
+ *     <ralf@waldorf-gmbh.de>
+ */
+
 #ifndef _CHECKSUM_H
 #define _CHECKSUM_H
 
 #include <net/ip.h>
 #include <asm/checksum.h>
 
+#ifndef _HAVE_ARCH_IPV6_CSUM
+
+static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
+                                                    struct in6_addr *daddr,
+                                                    __u16 len,
+                                                    unsigned short proto,
+                                                    unsigned int csum) 
+{
+
+       int carry;
+       __u32 ulen;
+       __u32 uproto;
+
+       csum += saddr->s6_addr32[0];
+       carry = (csum < saddr->s6_addr32[0]);
+       csum += carry;
+
+       csum += saddr->s6_addr32[1];
+       carry = (csum < saddr->s6_addr32[1]);
+       csum += carry;
+
+       csum += saddr->s6_addr32[2];
+       carry = (csum < saddr->s6_addr32[2]);
+       csum += carry;
+
+       csum += saddr->s6_addr32[3];
+       carry = (csum < saddr->s6_addr32[3]);
+       csum += carry;
+
+       csum += daddr->s6_addr32[0];
+       carry = (csum < daddr->s6_addr32[0]);
+       csum += carry;
+
+       csum += daddr->s6_addr32[1];
+       carry = (csum < daddr->s6_addr32[1]);
+       csum += carry;
+
+       csum += daddr->s6_addr32[2];
+       carry = (csum < daddr->s6_addr32[2]);
+       csum += carry;
+
+       csum += daddr->s6_addr32[3];
+       carry = (csum < daddr->s6_addr32[3]);
+       csum += carry;
+
+       ulen = htonl((__u32) len);
+       csum += ulen;
+       carry = (csum < ulen);
+       csum += carry;
+
+       uproto = htonl(proto);
+       csum += proto;
+       carry = (csum < proto);
+       csum += carry;
+
+       return csum_fold(csum);
+}
+
+#endif
+
 #endif
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
new file mode 100644 (file)
index 0000000..048243c
--- /dev/null
@@ -0,0 +1,109 @@
+/*
+ *     inet6 interface/address list definitions
+ *     Linux INET6 implementation 
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_IF_INET6_H
+#define _NET_IF_INET6_H
+
+#define DAD_COMPLETE   0x00
+#define DAD_INCOMPLETE 0x01
+#define DAD_STATUS     0x01
+
+#define ADDR_STATUS    0x06
+#define ADDR_DEPRECATED 0x02
+#define ADDR_INVALID   0x04
+
+#define ADDR_PERMANENT 0x80
+
+#define IF_RA_RCVD     0x20
+#define IF_RS_SENT     0x10
+
+#ifdef __KERNEL__
+
+struct inet6_ifaddr 
+{
+       struct in6_addr         addr;
+       __u32                   prefix_len;
+       
+       __u32                   valid_lft;
+       __u32                   prefered_lft;
+       unsigned long           tstamp;
+
+       __u8                    probes;
+       __u8                    flags;
+
+       __u16                   scope;
+
+       struct timer_list       timer;
+
+       struct inet6_dev        *idev;
+
+       struct inet6_ifaddr     *lst_next;      /* next addr in addr_lst */
+       struct inet6_ifaddr     *if_next;       /* next addr in inet6_dev */
+};
+
+
+struct ipv6_mc_socklist {
+       struct in6_addr         addr;
+       struct device           *dev;
+       struct ipv6_mc_socklist *next;
+};
+
+struct ipv6_mc_list {
+       struct in6_addr         addr;
+       struct device           *dev;
+       struct ipv6_mc_list     *next;
+       struct ipv6_mc_list     *if_next;
+       struct timer_list       timer;
+        int                    tm_running;
+        atomic_t               users;  
+};
+
+#define        IFA_HOST        IPV6_ADDR_LOOPBACK
+#define        IFA_LINK        IPV6_ADDR_LINKLOCAL
+#define        IFA_SITE        IPV6_ADDR_SITELOCAL
+#define        IFA_GLOBAL      0x0000U
+
+extern int             in6_ifnum;
+
+struct inet6_dev 
+{
+       struct device           *dev;
+
+       struct inet6_ifaddr     *addr_list;
+       struct ipv6_mc_list     *mc_list;
+
+       __u32                   if_index;
+       __u32                   if_flags;
+       __u32                   router:1,
+                               unused:31;
+
+       struct inet6_dev        *next;
+};
+
+
+extern __inline__ void ipv6_mc_map(struct in6_addr *addr, char *buf)
+{
+       /*
+        *      +-------+-------+-------+-------+-------+-------+
+        *      |   33  |   33  | DST13 | DST14 | DST15 | DST16 |
+        *      +-------+-------+-------+-------+-------+-------+
+        */
+
+       buf[0]= 0x33;
+       buf[1]= 0x33;
+
+       memcpy(buf + 2, &addr->s6_addr32[3], sizeof(__u32));
+}
+#endif
+#endif
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
new file mode 100644 (file)
index 0000000..c72c753
--- /dev/null
@@ -0,0 +1,47 @@
+#ifndef _INET_COMMON_H
+#define _INET_COMMON_H
+
+extern struct proto_ops                inet_proto_ops;
+extern struct sock *           tcp_sock_array[SOCK_ARRAY_SIZE];
+extern struct sock *           udp_sock_array[SOCK_ARRAY_SIZE];
+
+
+/*
+ *     INET4 prototypes used by INET6
+ */
+
+extern void                    inet_remove_sock(struct sock *sk1);
+extern void                    inet_put_sock(unsigned short num, 
+                                             struct sock *sk);
+extern int                     inet_release(struct socket *sock, 
+                                            struct socket *peer);
+extern int                     inet_connect(struct socket *sock, 
+                                            struct sockaddr * uaddr,
+                                            int addr_len, int flags);
+extern int                     inet_accept(struct socket *sock, 
+                                           struct socket *newsock, int flags);
+extern int                     inet_recvmsg(struct socket *sock, 
+                                            struct msghdr *ubuf, 
+                                            int size, int noblock, 
+                                            int flags, int *addr_len );
+extern int                     inet_sendmsg(struct socket *sock, 
+                                            struct msghdr *msg, 
+                                            int size, int noblock, 
+                                            int flags);
+extern int                     inet_shutdown(struct socket *sock, int how);
+extern int                     inet_select(struct socket *sock, int sel_type,
+                                           select_table *wait);
+extern int                     inet_setsockopt(struct socket *sock, int level,
+                                               int optname, char *optval, 
+                                               int optlen);
+extern int                     inet_getsockopt(struct socket *sock, int level,
+                                               int optname, char *optval, 
+                                               int *optlen);
+extern int                     inet_fcntl(struct socket *sock, 
+                                          unsigned int cmd, 
+                                          unsigned long arg);
+extern int                     inet_listen(struct socket *sock, int backlog);
+
+#endif
+
+
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
new file mode 100644 (file)
index 0000000..b42c3f0
--- /dev/null
@@ -0,0 +1,296 @@
+/*
+ *     Linux INET6 implementation
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>
+ *
+ *     $Id: ipv6.h,v 1.19 1996/09/24 17:04:20 roque Exp $
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_IPV6_H
+#define _NET_IPV6_H
+
+#include <linux/ipv6.h>
+#include <net/ndisc.h>
+
+/*
+ *     NextHeader field of IPv6 header
+ */
+
+#define NEXTHDR_HOP            0       /* Hop-by-hop option header. */
+#define NEXTHDR_TCP            6       /* TCP segment. */
+#define NEXTHDR_UDP            17      /* UDP message. */
+#define NEXTHDR_IPV6           41      /* IPv6 in IPv6 */
+#define NEXTHDR_ROUTING                43      /* Routing header. */
+#define NEXTHDR_FRAGMENT       44      /* Fragmentation/reassembly header. */
+#define NEXTHDR_ESP            50      /* Encapsulating security payload. */
+#define NEXTHDR_AUTH           51      /* Authentication header. */
+#define NEXTHDR_ICMP           58      /* ICMP for IPv6. */
+#define NEXTHDR_NONE           59      /* No next header */
+#define NEXTHDR_DEST           60      /* Destination options header. */
+
+#define NEXTHDR_MAX            255
+
+
+
+#define IPV6_DEFAULT_HOPLIMIT   64
+#define IPV6_DEFAULT_MCASTHOPS 1
+
+/*
+ *     Addr type
+ *     
+ *     type    -       unicast | multicast | anycast
+ *     scope   -       local   | site      | global
+ *     v4      -       compat
+ *     v4mapped
+ *     any
+ *     loopback
+ */
+
+#define IPV6_ADDR_ANY          0x0000U
+
+#define IPV6_ADDR_UNICAST              0x0001U 
+#define IPV6_ADDR_MULTICAST            0x0002U 
+#define IPV6_ADDR_ANYCAST      0x0004U
+
+#define IPV6_ADDR_LOOPBACK     0x0010U
+#define IPV6_ADDR_LINKLOCAL    0x0020U
+#define IPV6_ADDR_SITELOCAL    0x0040U
+
+#define IPV6_ADDR_COMPATv4     0x0080U
+
+#define IPV6_ADDR_SCOPE_MASK   0x00f0U
+
+#define IPV6_ADDR_MAPPED       0x1000U
+#define IPV6_ADDR_RESERVED     0x2000U /* reserved address space */
+
+/*
+ *     fragmentation header
+ */
+
+struct frag_hdr {
+       unsigned char   nexthdr;
+       unsigned char   reserved;       
+       unsigned short  frag_off;
+       __u32           identification;
+};
+
+#ifdef __KERNEL__
+
+#include <net/sock.h>
+
+extern struct ipv6_mib ipv6_statistics;
+
+extern int             ipv6_forwarding;        /* host/router switch */
+extern int             ipv6_hop_limit;         /* default hop limit */
+
+struct ipv6_frag {
+       __u16                   offset;
+       __u16                   len;
+       struct sk_buff          *skb;
+
+       struct frag_hdr         *fhdr;
+
+       struct ipv6_frag        *next;
+};
+
+/*
+ *     Equivalent of ipv4 struct ipq
+ */
+
+struct frag_queue {
+
+       struct frag_queue       *next;
+       struct frag_queue       *prev;
+
+       __u32                   id;             /* fragment id          */
+       struct timer_list       timer;          /* expire timer         */
+       struct ipv6_frag        *fragments;
+       struct device           *dev;
+       __u8                    last_in;        /* has last segment arrived? */
+       __u8                    nexthdr;
+       __u8                    *nhptr;
+};
+
+extern int                     ipv6_routing_header(struct sk_buff **skb, 
+                                                   struct device *dev,
+                                                   __u8 *nhptr, 
+                                                   struct ipv6_options *opt);
+
+extern int                     ipv6_reassembly(struct sk_buff **skb, 
+                                               struct device *dev, 
+                                               __u8 *nhptr,
+                                               struct ipv6_options *opt);
+
+#define IPV6_FRAG_TIMEOUT      (60*HZ)         /* 60 seconds */
+
+/*
+ *     Function prototype for build_xmit
+ */
+
+typedef void           (*inet_getfrag_t) (const void *data,
+                                          struct in6_addr *addr,
+                                          char *,
+                                          unsigned int, unsigned int);
+
+
+extern int             ipv6_addr_type(struct in6_addr *addr);
+
+extern __inline__ int ipv6_addr_cmp(struct in6_addr *a1, struct in6_addr *a2)
+{
+       return memcmp((void *) a1, (void *) a2, sizeof(struct in6_addr));
+}
+
+extern __inline__ void ipv6_addr_copy(struct in6_addr *a1, struct in6_addr *a2)
+{
+       memcpy((void *) a1, (void *) a2, sizeof(struct in6_addr));
+}
+
+#ifndef __HAVE_ARCH_ADDR_SET
+extern __inline__ void ipv6_addr_set(struct in6_addr *addr, 
+                                    __u32 w1, __u32 w2,
+                                    __u32 w3, __u32 w4)
+{
+       addr->s6_addr32[0] = w1;
+       addr->s6_addr32[1] = w2;
+       addr->s6_addr32[2] = w3;
+       addr->s6_addr32[3] = w4;
+}
+#endif
+
+extern __inline__ int ipv6_addr_any(struct in6_addr *a)
+{
+       return ((a->s6_addr32[0] | a->s6_addr32[1] | 
+                a->s6_addr32[2] | a->s6_addr32[3] ) == 0); 
+}
+
+/*
+ *     Prototypes exported by ipv6
+ */
+
+#if 0
+extern int                     ipv6_build_header(struct sk_buff *skb,
+                                                 struct device *dev,
+                                                 struct in6_addr *saddr_in, 
+                                                 struct in6_addr *daddr_in,
+                                                 int proto, int len, 
+                                                 struct ipv6_pinfo *np);
+#endif
+
+extern void                    ipv6_redo_mac_hdr(struct sk_buff *skb,
+                                                 struct neighbour *neigh,
+                                                 int len);
+
+extern int                     ipv6_bld_hdr_2(struct sock *sk,
+                                              struct sk_buff *skb,
+                                              struct device *dev,
+                                              struct neighbour *neigh,
+                                              struct in6_addr *saddr,
+                                              struct in6_addr *daddr,
+                                              int proto, int len);
+
+extern int                     ipv6_xmit(struct sock *sk,
+                                         struct sk_buff *skb,
+                                         struct in6_addr *saddr,
+                                         struct in6_addr *daddr,
+                                         struct ipv6_options *opt,
+                                         int proto);
+
+extern void                    ipv6_queue_xmit(struct sock *sk,
+                                               struct device *dev,
+                                               struct sk_buff *skb,
+                                               int free);
+
+extern int                     ipv6_build_xmit(struct sock *sk,
+                                               inet_getfrag_t getfrag,
+                                               const void * data,
+                                               struct in6_addr * daddr,
+                                               unsigned short int length,
+                                               struct in6_addr * saddr,
+                                               struct device *dev,
+                                               struct ipv6_options *opt,
+                                               int proto, int noblock);
+
+/*
+ *     rcv function (called from netdevice level)
+ */
+
+extern int                     ipv6_rcv(struct sk_buff *skb, 
+                                        struct device *dev, 
+                                        struct packet_type *pt);
+
+extern void                    ipv6_forward(struct sk_buff *skb,
+                                            struct device *dev,
+                                            int flags);
+
+#define IP6_FW_SRCRT   0x1
+#define        IP6_FW_STRICT   0x2
+
+/*
+ *     Extension header (options) processing
+ */
+extern int                     ipv6opt_bld_rthdr(struct sk_buff *skb,
+                                                 struct ipv6_options *opt,
+                                                 struct in6_addr *addr,
+                                                 int proto);
+
+extern int                     ipv6opt_srcrt_co(struct sockaddr_in6 *sin6, 
+                                                int len, 
+                                                struct ipv6_options *opt);
+
+extern int                     ipv6opt_srcrt_cl(struct sockaddr_in6 *sin6, 
+                                                int num_addrs, 
+                                                struct ipv6_options *opt);
+
+extern int                     ipv6opt_srt_tosin(struct ipv6_options *opt,
+                                                 struct sockaddr_in6 *sin6,
+                                                 int len);
+
+extern void                    ipv6opt_free(struct ipv6_options *opt);
+
+
+/*
+ *     socket lookup (af_inet6.c)
+ */
+
+extern struct sock *           inet6_get_sock(struct proto *prot, 
+                                              struct in6_addr *loc_addr, 
+                                              struct in6_addr *rmt_addr,
+                                              unsigned short loc_port,
+                                              unsigned short rmt_port);
+
+extern struct sock *           inet6_get_sock_raw(struct sock *sk, 
+                                                  unsigned short num,
+                                                  struct in6_addr *loc_addr, 
+                                                  struct in6_addr *rmt_addr);
+
+extern struct sock *           inet6_get_sock_mcast(struct sock *sk, 
+                                                    unsigned short num,
+                                                    unsigned short rmt_port,
+                                                    struct in6_addr *loc_addr, 
+                                                    struct in6_addr *rmt_addr);
+
+/*
+ *     socket options (ipv6_sockglue.c)
+ */
+
+extern int                     ipv6_setsockopt(struct sock *sk, int level, 
+                                               int optname, char *optval, 
+                                               int optlen);
+extern int                     ipv6_getsockopt(struct sock *sk, int level, 
+                                               int optname, char *optval, 
+                                               int *optlen);
+
+
+extern void                    ipv6_init(void);
+extern void                    ipv6_cleanup(void);
+#endif
+#endif
+
+
+
diff --git a/include/net/ipv6_route.h b/include/net/ipv6_route.h
new file mode 100644 (file)
index 0000000..32a9880
--- /dev/null
@@ -0,0 +1,196 @@
+/*
+ *     Linux INET6 implementation 
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_IPV6_ROUTE_H
+#define _NET_IPV6_ROUTE_H
+
+#include <linux/ipv6_route.h>
+
+
+#ifdef __KERNEL__
+
+
+struct fib6_node {
+       struct fib6_node        *parent;
+       struct fib6_node        *left;
+       struct fib6_node        *right;
+
+       struct rt6_info         *leaf;
+
+       __u16                   fn_bit;         /* bit key */
+       __u16                   fn_flags;
+       __u32                   fn_sernum;
+};
+
+
+struct rt6_info;
+
+typedef void (*rt6_output_method_t) (struct sk_buff *skb, struct rt6_info *rt);
+
+struct rt6_info {
+       struct fib6_node        *fib_node;
+       struct rt6_info         *next;
+
+       struct in6_addr         rt_dst;
+       
+       atomic_t                rt_use;         /* dcache references    */
+       atomic_t                rt_ref;         /* fib references       */
+
+       struct neighbour        *rt_nexthop;
+       struct device           *rt_dev;
+       
+       rt6_output_method_t     rt_output_method;
+
+       __u16                   rt_metric;
+       __u16                   rt_prefixlen;
+       __u32                   rt_flags;
+       unsigned long           rt_expires;
+};
+
+extern struct rt6_info         *default_rt_list;
+extern struct rt6_info         *last_resort_rt;
+
+struct dest_entry {
+       struct rt6_info         rt;
+
+       __u32                   dc_irtt;
+       __u32                   dc_window;
+       __u16                   dc_pmtu;
+
+       unsigned long           dc_tstamp;      /* for garbage collection */
+
+#define dc_addr                        rt.rt_dst
+#define dc_usecnt              rt.rt_use
+#define dc_nexthop             rt.rt_nexthop
+#define dc_flags               rt.rt_flags
+};
+
+/*
+ *     Structure for assync processing of operations on the routing
+ *     table
+ */
+
+struct rt6_req {
+       int                     operation;
+       struct rt6_info         *ptr;
+
+       struct rt6_req          *next;
+       struct rt6_req          *prev;
+
+#define RT_OPER_ADD            1
+#define RT_OPER_DEL            2
+};
+
+struct rt6_statistics {
+       __u32           fib_nodes;
+       __u32           fib_route_nodes;
+       __u32           fib_rt_alloc;
+       __u32           fib_rt_entries;
+       __u32           fib_dc_alloc;
+};
+
+#define RTN_ROOT       0x0001          /* root node                    */
+#define RTN_BACKTRACK  0x0002          /* backtrack point              */
+#define RTN_TAG                0x0010
+
+/*
+ *     Values for destination cache garbage colection
+ *     These are wild guesses for now...
+ */
+
+#define        DC_WATER_MARK           512
+#define DC_SHORT_TIMEOUT       (5*HZ)
+#define DC_LONG_TIMEOUT                (15*HZ)
+
+#define DC_TIME_RUN            (5*HZ)
+#define DC_TIME_RETRY          HZ
+
+/*
+ *     Prototypes
+ */
+
+/*
+ *     check/obtain destination cache from routing table
+ */
+
+extern struct dest_entry *     ipv6_dst_check(struct dest_entry *dc, 
+                                              struct in6_addr * daddr,
+                                              __u32 sernum, int flags);
+
+extern struct dest_entry *     ipv6_dst_route(struct in6_addr * daddr,
+                                              struct device *src_dev,
+                                              int flags);
+
+extern void                    ipv6_dst_unlock(struct dest_entry *dest);
+
+extern struct rt6_info *       fibv6_lookup(struct in6_addr *addr,
+                                            struct device *dev,
+                                            int flags);
+
+/*
+ *     user space set/del route
+ */
+
+extern int                     ipv6_route_ioctl(unsigned int cmd, void *arg);
+
+
+extern void                    ipv6_route_init(void);
+extern void                    ipv6_route_cleanup(void);
+
+extern int                     ipv6_route_add(struct in6_rtmsg *rt);
+
+extern int                     fib6_del_rt(struct rt6_info *rt);
+
+extern void                    rt6_sndmsg(__u32 type, struct in6_addr *dst,
+                                          struct in6_addr *gw, __u16 plen,
+                                          __u16 metric, char *devname,
+                                          __u16 flags);
+/*
+ *     ICMP interface
+ */
+
+extern struct rt6_info *       ipv6_rt_redirect(struct device *dev,
+                                                struct in6_addr *dest,
+                                                struct in6_addr *target,
+                                                int on_link);
+
+extern void                    rt6_handle_pmtu(struct in6_addr *addr,
+                                               int pmtu);
+/*
+ *
+ */
+
+extern struct fib6_node                routing_table;
+extern struct rt6_statistics   rt6_stats;
+
+static __inline__ void rt_release(struct rt6_info *rt)
+{
+       atomic_dec(&rt->rt_ref);
+       if ((rt->rt_use | rt->rt_ref) == 0)
+       {
+               if (rt->rt_nexthop)
+               {
+                       ndisc_dec_neigh(rt->rt_nexthop);
+               }
+
+               if (rt->rt_flags & RTI_DCACHE)
+               {
+                       rt6_stats.fib_dc_alloc--;
+               }
+               rt6_stats.fib_rt_alloc--;
+               kfree(rt);
+       }
+}
+
+#endif
+
+#endif
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
new file mode 100644 (file)
index 0000000..3605997
--- /dev/null
@@ -0,0 +1,184 @@
+#ifndef _NDISC_H
+#define _NDISC_H
+
+
+/*
+ *     Neighbor Cache Entry States (7.3.2.)
+ */
+
+/*
+ *     The lsb is set for states that have a timer associated
+ */
+
+#define NUD_NONE       0x00
+#define NUD_INCOMPLETE 0x11
+#define NUD_REACHABLE  0x20
+#define NUD_STALE      0x30
+#define NUD_DELAY      0x41
+#define NUD_PROBE      0x51
+#define NUD_FAILED     0x60    /* neighbour discovery failed   */
+
+#define NUD_IN_TIMER   0x01
+
+#define NDISC_QUEUE_LEN        3
+
+#define NCF_NOARP              0x01    /* no ARP needed on this device */
+#define NCF_SUBNET             0x02    /* NC entry for subnet          */
+#define NCF_INVALID            0x04
+#define NCF_DELAY_EXPIRED      0x08    /* time to move to PROBE        */
+#define NCF_ROUTER             0x10    /* neighbour is a router        */
+#define NCF_HHVALID            0x20    /* Hardware header is valid     */
+
+/*
+ *     ICMP codes for neighbour discovery messages
+ */
+
+#define NDISC_ROUTER_SOLICITATION      133
+#define NDISC_ROUTER_ADVERTISEMENT     134
+#define NDISC_NEIGHBOUR_SOLICITATION   135
+#define NDISC_NEIGHBOUR_ADVERTISEMENT  136
+#define NDISC_REDIRECT                 137
+
+/*
+ *     ndisc options
+ */
+
+#define ND_OPT_SOURCE_LL_ADDR          1
+#define ND_OPT_TARGET_LL_ADDR          2
+#define ND_OPT_PREFIX_INFO             3
+#define ND_OPT_REDIRECT_HDR            4
+#define ND_OPT_MTU                     5
+
+#define MAX_RTR_SOLICITATION_DELAY     HZ
+
+#define RECHABLE_TIME                  (30*HZ)
+#define RETRANS_TIMER                  HZ
+
+#define MIN_RANDOM_FACTOR              (1/2)
+#define MAX_RANDOM_FACTOR              (3/2)
+
+#define REACH_RANDOM_INTERVAL          (60*60*HZ)      /* 1 hour */
+
+#ifdef __KERNEL__
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/icmpv6.h>
+#include <asm/atomic.h>
+
+/*
+ *     neighbour cache entry
+ *     used by neighbour discovery module
+ *     as similar functions of "struct hh_cache" used in ipv4
+ */
+struct neighbour {
+       struct in6_addr         addr;           /* next hop addr */
+       __u8                    len;            /* prefix len    */
+       __u8                    type;           /* {unicast, multicast} */
+                       
+       struct device *         dev;
+
+       __u8                    flags;
+
+       
+       __u8                    hh_data[MAX_ADDR_LEN];  /* cached hdr   */
+       __u8                    *h_dest;                /* dest addr    */
+
+       struct sk_buff_head     arp_queue;      /* packets waiting for ND to
+                                                  finish */
+       atomic_t                refcnt;
+       __u8                    nud_state;
+       __u8                    probes;
+       __u32                   tstamp;         /* last reachable conf  */
+
+       unsigned long           expires;        /* timer expires at     */
+
+       struct neighbour        *next;          /* for hash chaining    */
+       struct neighbour        *prev;          /* for hash chaining    */
+};
+
+struct nd_msg {
+        struct icmpv6hdr icmph;
+        struct in6_addr target;
+        struct {
+                __u8   opt_type;
+                __u8   opt_len;
+                __u8   link_addr[MAX_ADDR_LEN];
+        } opt;
+};
+
+struct ra_msg {
+        struct icmpv6hdr       icmph;
+       __u32                   reachable_time;
+       __u32                   retrans_timer;
+};
+
+struct ndisc_statistics {
+       __u32   allocs;                 /* allocated entries            */
+       __u32   free_delayed;           /* zombie entries               */
+       __u32   snt_probes_ucast;       /* ns probes sent (ucast)       */
+       __u32   snt_probes_mcast;       /* ns probes sent (mcast)       */
+       __u32   rcv_probes_ucast;       /* ns probes rcv  (ucast)       */
+       __u32   rcv_probes_mcast;       /* ns probes rcv  (mcast)       */
+       __u32   rcv_upper_conf;         /* confirmations from upper layers */
+       __u32   res_failed;             /* address resolution failures  */
+};
+
+extern struct neighbour *      ndisc_get_neigh(struct device *dev, 
+                                               struct in6_addr *addr);
+
+extern void                    ndisc_validate(struct neighbour *neigh);
+
+extern void                    ndisc_init(struct proto_ops *ops);
+extern void                    ndisc_cleanup(void);
+
+extern int                     ndisc_eth_resolv(unsigned char *,
+                                                struct device *,
+                                                struct sk_buff *);
+
+extern int                     ndisc_rcv(struct sk_buff *skb,
+                                         struct device *dev,
+                                         struct in6_addr *saddr,
+                                         struct in6_addr *daddr,
+                                         struct ipv6_options *opt,
+                                         unsigned short len);
+
+extern void                    ndisc_event_send(struct neighbour *neigh,
+                                                struct sk_buff *skb);
+
+extern void                    ndisc_send_ns(struct device *dev,
+                                             struct neighbour *neigh,
+                                             struct in6_addr *solicit,
+                                             struct in6_addr *daddr,
+                                             struct in6_addr *saddr);
+
+extern void                    ndisc_send_rs(struct device *dev,
+                                             struct in6_addr *saddr,
+                                             struct in6_addr *daddr);
+
+extern int                     (*ndisc_eth_hook) (unsigned char *,
+                                                  struct device *,
+                                                  struct sk_buff *);
+
+extern void                    ndisc_forwarding_on(void);
+extern void                    ndisc_forwarding_off(void);
+
+extern void                    ndisc_send_redirect(struct sk_buff *skb,
+                                                   struct neighbour *neigh,
+                                                   struct in6_addr *target);
+
+struct rt6_info *              dflt_rt_lookup(void);
+
+extern unsigned long   nd_rand_seed;
+extern int             ipv6_random(void);
+
+
+static __inline__ void ndisc_dec_neigh(struct neighbour *neigh)
+{
+       atomic_dec(&neigh->refcnt);
+}
+
+#endif /* __KERNEL__ */
+
+
+#endif
index 0d7cf3fab0f86da54be2adf03673584a24b2dd29..3b92e80bcac91df73426d6498dc92d6267fae8b6 100644 (file)
@@ -2,9 +2,10 @@
 #define __NET_NETLINK_H
 
 #define NET_MAJOR 36           /* Major 18 is reserved for networking                                          */
-#define MAX_LINKS 11           /* 18,0 for route updates, 18,1 for SKIP, 18,2 debug tap 18,3 PPP reserved      */
+#define MAX_LINKS 12           /* 18,0 for route updates, 18,1 for SKIP, 18,2 debug tap 18,3 PPP reserved      */
                                /* 4-7 are psi0-psi3  8 is arpd 9 is ppp */
                                /* 10 is for IPSEC <John Ioannidis> */
+                               /* 11 IPv6 route updates                */
 #define MAX_QBYTES 32768       /* Maximum bytes in the queue                                                   */
 
 #include <linux/config.h>
@@ -23,6 +24,7 @@ extern int init_netlink(void);
 #define NETLINK_ARPD           8
 #define NETLINK_NET_PPP                9       /* Non tty PPP devices */
 #define NETLINK_IPSEC          10      /* IPSEC */
+#define NETLINK_ROUTE6         11      /* af_inet6 route comm channel */
 
 #ifdef CONFIG_RTNETLINK
 extern void ip_netlink_msg(unsigned long, __u32, __u32, __u32, short, short, char *);
index ae328b6982eb19b653863e727c4132cf6b7e8b0c..1556c2cc8a02913e154a0ce9db874828c377861d 100644 (file)
  *             Alan Cox        :       Added a name field and a frag handler
  *                                     field for later.
  *             Alan Cox        :       Cleaned up, and sorted types.
+ *             Pedro Roque     :       inet6 protocols
  */
  
 #ifndef _PROTOCOL_H
 #define _PROTOCOL_H
 
+#include <linux/config.h>
+#include <linux/in6.h>
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#include <linux/ipv6.h>
+#endif
+
 #define MAX_INET_PROTOS        32              /* Must be a power of 2         */
 
 
@@ -33,8 +40,7 @@ struct inet_protocol {
                                   unsigned short len, __u32 saddr,
                                   int redo, struct inet_protocol *protocol);
   void                 (*err_handler)(int type, int code, unsigned char *buff,
-                                      __u32 daddr,
-                                      __u32 saddr,
+                                      __u32 info, __u32 daddr, __u32 saddr,
                                       struct inet_protocol *protocol);
   struct inet_protocol *next;
   unsigned char                protocol;
@@ -43,13 +49,41 @@ struct inet_protocol {
   const char           *name;
 };
 
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+struct inet6_protocol {
+  int                  (*handler)(struct sk_buff *skb, struct device *dev,
+                                  struct in6_addr *saddr,
+                                  struct in6_addr *daddr,
+                                  struct ipv6_options *opt, 
+                                  unsigned short len,
+                                  int redo, struct inet6_protocol *protocol);
+
+  void                 (*err_handler)(int type, int code, unsigned char *buff,
+                                      __u32 info, struct in6_addr *saddr,
+                                      struct in6_addr *daddr,
+                                      struct inet6_protocol *protocol);
+  struct inet6_protocol *next;
+  unsigned char                protocol;
+  unsigned char                copy:1;
+  void                 *data;
+  const char           *name;
+};
+#endif
 
 extern struct inet_protocol *inet_protocol_base;
 extern struct inet_protocol *inet_protos[MAX_INET_PROTOS];
 
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+extern struct inet6_protocol *inet6_protocol_base;
+extern struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
+#endif
 
 extern void            inet_add_protocol(struct inet_protocol *prot);
 extern int             inet_del_protocol(struct inet_protocol *prot);
 
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+extern void            inet6_add_protocol(struct inet6_protocol *prot);
+extern int             inet6_del_protocol(struct inet6_protocol *prot);
+#endif
 
 #endif /* _PROTOCOL_H */
diff --git a/include/net/rawv6.h b/include/net/rawv6.h
new file mode 100644 (file)
index 0000000..30ecc79
--- /dev/null
@@ -0,0 +1,21 @@
+#ifndef _NET_RAWV6_H
+#define _NET_RAWV6_H
+
+#ifdef __KERNEL__
+extern int                     rawv6_rcv(struct sk_buff *skb, 
+                                         struct device *dev,
+                                         struct in6_addr *saddr, 
+                                         struct in6_addr *daddr,
+                                         struct ipv6_options *opt, 
+                                         unsigned short len);
+
+
+extern void                    rawv6_err(struct sock *sk,
+                                         int type, int code, 
+                                         unsigned char *buff,
+                                         struct in6_addr *saddr,
+                                         struct in6_addr *daddr);
+
+#endif
+
+#endif
diff --git a/include/net/sit.h b/include/net/sit.h
new file mode 100644 (file)
index 0000000..98bb5b3
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ *     SIT tunneling device - definitions
+ *     Linux INET6 implementation
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _NET_SIT_H
+#define _NET_SIT_H
+
+struct sit_mtu_info {
+       __u32                   addr;           /* IPv4 destination     */
+       unsigned long           tstamp;         /* last use tstamp      */
+       __u32                   mtu;            /* Path MTU             */
+       struct sit_mtu_info     *next;
+};
+
+struct sit_vif {
+       char                    name[8];
+       struct device           *dev;
+       struct sit_vif          *next;
+};
+
+extern int                             sit_init(void);
+extern void                            sit_cleanup(void);
+
+extern struct device *                 sit_add_tunnel(__u32 dstaddr);
+
+#define SIT_GC_TIMEOUT         (3*60*HZ)
+#define SIT_GC_FREQUENCY       (2*60*HZ)
+
+#endif
index 552292be65ced4b5e40e3b870947bdb642f97eaa..a9dd844d16728d194aaa87012576cb757392ee95 100644 (file)
@@ -48,6 +48,26 @@ struct ip_mib
        unsigned long   IpFragCreates;
 };
  
+struct ipv6_mib
+{
+       unsigned long   Ip6InReceives;
+       unsigned long   Ip6InHdrErrors;
+       unsigned long   Ip6InAddrErrors;
+       unsigned long   Ip6ForwDatagrams;
+       unsigned long   Ip6InUnknownProtos;
+       unsigned long   Ip6InDiscards;
+       unsigned long   Ip6InDelivers;
+       unsigned long   Ip6OutRequests;
+       unsigned long   Ip6OutDiscards;
+       unsigned long   Ip6OutNoRoutes;
+       unsigned long   Ip6ReasmTimeout;
+       unsigned long   Ip6ReasmReqds;
+       unsigned long   Ip6ReasmOKs;
+       unsigned long   Ip6ReasmFails;
+       unsigned long   Ip6FragOKs;
+       unsigned long   Ip6FragFails;
+       unsigned long   Ip6FragCreates;
+};
  
 struct icmp_mib
 {
index 8a7c602f700271043a66453c6a36e6fdcaff8857..b0ea99a80aeadfb8d81b4a4a4d9663e69f954713 100644 (file)
 #ifndef _SOCK_H
 #define _SOCK_H
 
+#include <linux/config.h>
 #include <linux/timer.h>
 #include <linux/ip.h>          /* struct options */
 #include <linux/in.h>          /* struct sockaddr_in */
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#include <linux/in6.h>         /* struct sockaddr_in6 */
+#include <linux/ipv6.h>                /* dest_cache, inet6_options */
+#include <linux/icmpv6.h>
+#include <net/if_inet6.h>      /* struct ipv6_mc_socklist */
+#endif
+
 #include <linux/tcp.h>         /* struct tcphdr */
 #include <linux/config.h>
 
@@ -116,7 +125,53 @@ struct ipx_opt
 };
 #endif
 
-#ifdef CONFIG_NUTCP
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+struct ipv6_pinfo
+{
+       struct in6_addr         saddr;
+       struct in6_addr         rcv_saddr;
+       struct in6_addr         daddr;
+
+       __u32                   flow_lbl;
+       __u8                    priority;
+       __u8                    hop_limit;
+
+       __u8                    mcast_hops;
+
+       /* sockopt flags */
+
+       __u8                    recvsrcrt:1,
+                               rxinfo:1,
+                               mc_loop:1,
+                                unused:4;
+
+       /* device for outgoing mcast packets */
+
+       struct device           *mc_if;
+
+       struct ipv6_mc_socklist *ipv6_mc_list;
+       /* 
+        * destination cache entry pointer
+        * contains a pointer to neighbour cache
+        * and other info related to network level 
+        * (ex. PMTU)
+        */
+       
+       struct dest_entry       *dest;
+       __u32                   dc_sernum;
+
+       struct ipv6_options     *opt;
+};
+
+struct raw6_opt {
+       __u32                   checksum;       /* perform checksum */
+       __u32                   offset;         /* checksum offset  */
+
+       struct icmp6_filter     filter;
+};
+
+#endif /* IPV6 */
+
 struct tcp_opt
 {
 /*
@@ -132,6 +187,14 @@ struct tcp_opt
        __u32   snd_up;         /* Outgoing urgent pointer              */
        __u32   snd_wl1;        /* Sequence for window update           */
        __u32   snd_wl2;        /* Ack sequence for update              */
+
+       __u32   rcv_wup;        /* rcv_nxt on last window update sent   */
+
+
+       __u32   srtt;           /* smothed round trip time << 3         */
+       __u32   mdev;           /* medium deviation                     */
+       __u32   rto;            /* retransmit timeout                   */
+       __u32   backoff;        /* backoff                              */
 /*
  *     Slow start and congestion control (see also Nagle, and Karn & Partridge)
  */
@@ -145,14 +208,85 @@ struct tcp_opt
        struct timer_list       completion_timer;       /* Up/Down timer */
        struct timer_list       probe_timer;            /* Probes       */
        struct timer_list       retransmit_timer;       /* Resend (no ack) */
+
+       __u32   basertt;        /* Vegas baseRTT */
+
+       __u8    delayed_acks;
+       __u8    dup_acks;
+
+       __u32   lrcvtime;       /* timestamp of last received data packet  */
+       __u32   rcv_tstamp;     /* timestamp of last received packet  */
+       __u32   iat_mdev;       /* interarrival time medium deviation */
+       __u32   iat;            /* interarrival time */
+       __u32   ato;            /* delayed ack timeout */
+
+       __u32   high_seq;
+/*
+ *     new send pointers
+ */
+       struct sk_buff *        send_head;
+       struct sk_buff *        retrans_head;   /* retrans head can be 
+                                                * different to the head of
+                                                * write queue if we are doing
+                                                * fast retransmit
+                                                */
+/*
+ * pending events
+ */
+       __u8    pending;
+
+/*
+ *     Header prediction flags
+ *     0x5?10 << 16 + snd_wnd in net byte order
+ */
+       __u32   pred_flags;
+       __u32   snd_wnd;                /* The window we expect to receive */
+
+       __u32   probes_out;             /* unanswered 0 window probes      */
+
+       struct open_request     *syn_wait_queue;
+       struct tcp_func         *af_specific;
 };
-#endif
+
        
 /*
  * This structure really needs to be cleaned up.
  * Most of it is for TCP, and not used by any of
  * the other protocols.
  */
+
+/*
+ * The idea is to start moving to a newer struct gradualy
+ * 
+ * IMHO the newer struct should have the following format:
+ * 
+ *     struct sock {
+ *             sockmem [mem, proto, callbacks]
+ *
+ *             union or struct {
+ *                     netrom;
+ *                     ax_25;
+ *             } ll_pinfo;
+ *     
+ *             union {
+ *                     ipv4;
+ *                     ipv6;
+ *                     ipx;
+ *             } net_pinfo;
+ *
+ *             union {
+ *                     tcp;
+ *                     udp;
+ *                     spx;
+ *             } tp_pinfo;
+ *
+ *     }
+ */
+
+/*
+ *  TCP will start to use the new protinfo while *still using the old* fields 
+ */
+
 struct sock 
 {
        struct options          *opt;
@@ -160,17 +294,15 @@ struct sock
        atomic_t                rmem_alloc;
        unsigned long           allocation;             /* Allocation mode */
        __u32                   write_seq;
-       __u32                   sent_seq;
-       __u32                   acked_seq;
        __u32                   copied_seq;
-       __u32                   rcv_ack_seq;
-       unsigned short          rcv_ack_cnt;            /* count of same ack */
-       __u32                   window_seq;
        __u32                   fin_seq;
        __u32                   syn_seq;
        __u32                   urg_seq;
        __u32                   urg_data;
        int                     users;                  /* user count */
+
+       unsigned char           delayed_acks,
+                               dup_acks;
   /*
    *   Not all are volatile, but some are, so we
    *   might as well say they all are.
@@ -183,7 +315,6 @@ struct sock
                                reuse,
                                keepopen,
                                linger,
-                               delay_acks,
                                destroy,
                                ack_timed,
                                no_check,
@@ -196,52 +327,68 @@ struct sock
        struct sock             *next;
        struct sock             *prev; /* Doubly linked chain.. */
        struct sock             *pair;
-       struct sk_buff          * volatile send_head;
-       struct sk_buff          * volatile send_next;
-       struct sk_buff          * volatile send_tail;
+
+       struct sk_buff          * send_head;
+       struct sk_buff          * send_tail;
+
        struct sk_buff_head     back_log;
        struct sk_buff          *partial;
        struct timer_list       partial_timer;
-       long                    retransmits;
+       atomic_t                retransmits;
+
        struct sk_buff_head     write_queue,
-                               receive_queue;
+                               receive_queue,
+                               out_of_order_queue;
+
+       unsigned short          family;
        struct proto            *prot;
        struct wait_queue       **sleep;
+
        __u32                   daddr;
        __u32                   saddr;          /* Sending source */
        __u32                   rcv_saddr;      /* Bound address */
+
        unsigned short          max_unacked;
-       unsigned short          window;
-       __u32                   lastwin_seq;    /* sequence number when we last updated the window we offer */
-       __u32                   high_seq;       /* sequence number when we did current fast retransmit */
-       volatile unsigned long  ato;            /* ack timeout */
-       volatile unsigned long  lrcvtime;       /* jiffies at last data rcv */
-       volatile unsigned long  idletime;       /* jiffies at last rcv */
+
+
        unsigned short          bytes_rcv;
 /*
  *     mss is min(mtu, max_window) 
  */
        unsigned short          mtu;       /* mss negotiated in the syn's */
-       volatile unsigned short mss;       /* current eff. mss - can change */
-       volatile unsigned short user_mss;  /* mss requested by user in ioctl */
-       volatile unsigned short max_window;
+       unsigned short          mss;       /* current eff. mss - can change */
+       unsigned short          user_mss;  /* mss requested by user in ioctl */
+       unsigned short          max_window;
        unsigned long           window_clamp;
        unsigned int            ssthresh;
        unsigned short          num;
-       volatile unsigned short cong_window;
-       volatile unsigned short cong_count;
-       volatile unsigned short packets_out;
-       volatile unsigned short shutdown;
-       volatile unsigned long  rtt;
-       volatile unsigned long  mdev;
-       volatile unsigned long  rto;
 
+       unsigned short          cong_window;
+       unsigned short          cong_count;
+       atomic_t                packets_out;
+       unsigned short          shutdown;
+
+       unsigned short          window;         /* used by netrom/ax.25 */
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+       union {
+               struct ipv6_pinfo       af_inet6;
+       } net_pinfo;
+#endif
+
+       union {
+               struct tcp_opt          af_tcp;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+               struct raw6_opt         tp_raw;
+#endif
+       } tp_pinfo;
 /*
  *     currently backoff isn't used, but I'm maintaining it in case
  *     we want to go back to a backoff formula that needs it
  */
-       volatile unsigned short backoff;
+/* 
+       unsigned short          backoff;
+ */
        int                     err, err_soft;  /* Soft holds errors that don't
                                                   cause failure but are the cause
                                                   of a persistent failure not just
@@ -252,8 +399,8 @@ struct sock
        unsigned char           max_ack_backlog;
        unsigned char           priority;
        unsigned char           debug;
-       unsigned short          rcvbuf;
-       unsigned short          sndbuf;
+       int                     rcvbuf;
+       int                     sndbuf;
        unsigned short          type;
        unsigned char           localroute;     /* Route locally only */
 #ifdef CONFIG_AX25
@@ -328,63 +475,82 @@ struct sock
        void                    (*data_ready)(struct sock *sk,int bytes);
        void                    (*write_space)(struct sock *sk);
        void                    (*error_report)(struct sock *sk);
-  
+
+       int                     (*backlog_rcv) (struct sock *sk,
+                                               struct sk_buff *skb);  
 };
 
+#if 0
+/*
+ *     Inet protocol options
+ */
+struct inet_options {
+       __u8                            version;
+       union {
+               struct options          opt_v4;
+               struct ipv6_options     opt_v6;
+       } u;
+};
+#endif
+
 /*
  *     IP protocol blocks we attach to sockets.
+ *     socket layer -> transport layer interface
+ *     transport -> network interface is defined by struct inet_proto
  */
  
 struct proto 
 {
-       void                    (*close)(struct sock *sk, unsigned long timeout);
-       int                     (*build_header)(struct sk_buff *skb,
-                                       __u32 saddr,
-                                       __u32 daddr,
-                                       struct device **dev, int type,
-                                       struct options *opt, int len,
-                                       int tos, int ttl, struct rtable ** rp);
+       void                    (*close)(struct sock *sk, 
+                                       unsigned long timeout);
        int                     (*connect)(struct sock *sk,
-                                       struct sockaddr_in *usin, int addr_len);
+                                       struct sockaddr *uaddr, 
+                                       int addr_len);
+
        struct sock *           (*accept) (struct sock *sk, int flags);
-       void                    (*queue_xmit)(struct sock *sk,
-                                       struct device *dev, struct sk_buff *skb,
-                                       int free);
        void                    (*retransmit)(struct sock *sk, int all);
        void                    (*write_wakeup)(struct sock *sk);
        void                    (*read_wakeup)(struct sock *sk);
-       int                     (*rcv)(struct sk_buff *buff, struct device *dev,
-                                       struct options *opt, __u32 daddr,
-                                       unsigned short len, __u32 saddr,
-                                       int redo, struct inet_protocol *protocol);
+
        int                     (*select)(struct sock *sk, int which,
                                        select_table *wait);
+
        int                     (*ioctl)(struct sock *sk, int cmd,
                                        unsigned long arg);
        int                     (*init)(struct sock *sk);
+       int                     (*destroy)(struct sock *sk);
        void                    (*shutdown)(struct sock *sk, int how);
-       int                     (*setsockopt)(struct sock *sk, int level, int optname,
-                                       char *optval, int optlen);
-       int                     (*getsockopt)(struct sock *sk, int level, int optname,
-                                       char *optval, int *option);      
-       int                     (*sendmsg)(struct sock *sk, struct msghdr *msg, int len,
-                                       int noblock, int flags);
-       int                     (*recvmsg)(struct sock *sk, struct msghdr *msg, int len,
-                                       int noblock, int flags, int *addr_len);
-       int                     (*bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len);
+       int                     (*setsockopt)(struct sock *sk, int level, 
+                                       int optname, char *optval, int optlen);
+       int                     (*getsockopt)(struct sock *sk, int level, 
+                                       int optname, char *optval, 
+                                       int *option);    
+       int                     (*sendmsg)(struct sock *sk, struct msghdr *msg,
+                                       int len, int noblock, int flags);
+       int                     (*recvmsg)(struct sock *sk, struct msghdr *msg,
+                                       int len, int noblock, int flags, 
+                                       int *addr_len);
+       int                     (*bind)(struct sock *sk, 
+                                       struct sockaddr *uaddr, int addr_len);
+
+       int                     (*backlog_rcv) (struct sock *sk, 
+                                               struct sk_buff *skb);
+
        unsigned short          max_header;
        unsigned long           retransmits;
        char                    name[32];
        int                     inuse, highestinuse;
-       struct sock *           sock_array[SOCK_ARRAY_SIZE];
+       struct sock **          sock_array;
 };
 
-#define TIME_WRITE     1
-#define TIME_CLOSE     2
-#define TIME_KEEPOPEN  3
-#define TIME_DESTROY   4
-#define TIME_DONE      5       /* Used to absorb those last few packets */
-#define TIME_PROBE0    6
+#define TIME_WRITE     1       /* Not yet used */
+#define TIME_RETRANS   2       /* Retransmit timer */
+#define TIME_DACK      3       /* Delayed ack timer */
+#define TIME_CLOSE     4
+#define TIME_KEEPOPEN  5
+#define TIME_DESTROY   6
+#define TIME_DONE      7       /* Used to absorb those last few packets */
+#define TIME_PROBE0    8
 
 /*
  *     About 10 seconds 
@@ -445,13 +611,32 @@ here:
                __release_sock(sk);
 }
 
+/*
+ *     This might not be the most apropriate place for this two         
+ *     but since they are used by a lot of the net related code
+ *     at least they get declared on a include that is common to all
+ */
+
+static __inline__ int min(unsigned int a, unsigned int b)
+{
+       if (a > b)
+               a = b; 
+       return a;
+}
+
+static __inline__ int max(unsigned int a, unsigned int b)
+{
+       if (a < b)
+               a = b;
+       return a;
+}
 
 extern struct sock *           sk_alloc(int priority);
 extern void                    sk_free(struct sock *sk);
 extern void                    destroy_sock(struct sock *sk);
 extern unsigned short          get_new_socknum(struct proto *,
                                                unsigned short);
-extern void                    put_sock(unsigned short, struct sock *); 
+extern void                    inet_put_sock(unsigned short, struct sock *); 
 extern struct sock             *get_sock(struct proto *, unsigned short,
                                          unsigned long, unsigned short,
                                          unsigned long,
@@ -537,8 +722,8 @@ extern __inline__ int sock_error(struct sock *sk)
  
 extern struct sock *timer_base;
 
-extern void delete_timer (struct sock *);
-extern void reset_timer (struct sock *, int, unsigned long);
+extern void net_delete_timer (struct sock *);
+extern void net_reset_timer (struct sock *, int, unsigned long);
 extern void net_timer (unsigned long);
 
 
index d9e8a4e8a48d0f78e132b840bba0993e640fa198..2c2c6593dba520982d59afc5b6070081cf9356f2 100644 (file)
 #ifndef _TCP_H
 #define _TCP_H
 
+#include <linux/config.h>
 #include <linux/tcp.h>
 #include <net/checksum.h>
 
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#define NETHDR_SIZE    sizeof(struct ipv6hdr)
+#else
+#define NETHDR_SIZE    sizeof(struct iphdr) + 40
+#endif
+
 /*
  * 40 is maximal IP options size
  * 4  is TCP option size (MSS)
  */
-#define MAX_SYN_SIZE   (sizeof(struct iphdr) + 40 + sizeof(struct tcphdr) + 4 + MAX_HEADER + 15)
-#define MAX_FIN_SIZE   (sizeof(struct iphdr) + 40 + sizeof(struct tcphdr) + MAX_HEADER + 15)
-#define MAX_ACK_SIZE   (sizeof(struct iphdr) + 40 + sizeof(struct tcphdr) + MAX_HEADER + 15)
-#define MAX_RESET_SIZE (sizeof(struct iphdr) + 40 + sizeof(struct tcphdr) + MAX_HEADER + 15)
+
+#define MAX_SYN_SIZE   (NETHDR_SIZE + sizeof(struct tcphdr) + 4 + MAX_HEADER + 15)
+#define MAX_FIN_SIZE   (NETHDR_SIZE + sizeof(struct tcphdr) + MAX_HEADER + 15)
+#define MAX_ACK_SIZE   (NETHDR_SIZE + sizeof(struct tcphdr) + MAX_HEADER + 15)
+#define MAX_RESET_SIZE (NETHDR_SIZE + sizeof(struct tcphdr) + MAX_HEADER + 15)
 
 #define MAX_WINDOW     32767           /* Never offer a window over 32767 without using
                                           window scaling (not yet supported). Some poor
                                           stacks do signed 16bit maths! */
 #define MIN_WINDOW     2048
 #define MAX_ACK_BACKLOG        2
-#define MAX_DUP_ACKS   2
+#define MAX_DELAY_ACK  2
 #define MIN_WRITE_SPACE        2048
 #define TCP_WINDOW_DIFF        2048
 
@@ -58,7 +67,8 @@
 #define TCP_TIMEOUT_LEN        (15*60*HZ) /* should be about 15 mins           */
 #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to successfully 
                                  * close the socket, about 60 seconds  */
-#define TCP_FIN_TIMEOUT (3*60*HZ) /* BSD style FIN_WAIT2 deadlock breaker */                             
+#define TCP_FIN_TIMEOUT (3*60*HZ) /* BSD style FIN_WAIT2 deadlock breaker */
+
 #define TCP_ACK_TIME   (3*HZ)  /* time to delay before sending an ACK  */
 #define TCP_DONE_TIME  (5*HZ/2)/* maximum time to wait before actually
                                 * destroying a socket                  */
 #define TCP_PROBEWAIT_LEN (1*HZ)/* time to wait between probes when
                                 * I've got something to write and
                                 * there is no window                   */
-
+#define TCP_KEEPALIVE_TIME (180*60*HZ) /* two hours */
+#define TCP_KEEPALIVE_PROBES   9       /* Max of 9 keepalive probes    */
+#define TCP_KEEPALIVE_PERIOD (75*HZ)   /* period of keepalive check    */
 #define TCP_NO_CHECK   0       /* turn to one if you want the default
                                 * to be no checksum                    */
 
+#define TCP_SYNACK_PERIOD      (HZ/2)
 
 /*
  *     TCP option
 #define TCPOPT_WINDOW          3       /* Window scaling */
 #define TCPOPT_TIMESTAMP       8       /* Better RTT estimations/PAWS */
 
+/*
+ *     TCP option lengths
+ */
+
+#define TCPOLEN_MSS            4
+#define TCPOLEN_WINDOW         3
+#define TCPOLEN_TIMESTAMP      10
+
+
+/*
+ *     TCP Vegas constants
+ */
+
+#define TCP_VEGAS_ALPHA                2       /*  v_cong_detect_top_nseg */
+#define TCP_VEGAS_BETA         4       /*  v_cong_detect_bot_nseg */
+#define TCP_VEGAS_GAMMA                1       /*  v_exp_inc_nseg         */
+
+struct open_request;
+
+struct or_calltable {
+       void (*rtx_syn_ack)     (struct sock *sk, struct open_request *req);
+       void (*destructor)      (struct open_request *req);
+};
+
+struct open_request {
+       struct open_request     *dl_next;
+       struct open_request     *dl_prev;
+       __u32                   rcv_isn;
+       __u32                   snt_isn;
+       __u16                   mss;
+       __u16                   rmt_port;
+       unsigned long           expires;
+       int                     retrans;
+       struct or_calltable     *class;
+       struct sock             *sk;
+};
+
+struct tcp_v4_open_req {
+       struct open_request     req;
+       __u32                   loc_addr;
+       __u32                   rmt_addr;
+       struct options          *opt;
+};
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+struct tcp_v6_open_req {
+       struct open_request     req;
+       struct in6_addr         loc_addr;
+       struct in6_addr         rmt_addr;
+       struct ipv6_options     *opt;
+       struct device           *dev;
+};
+#endif
+
+/*
+ *     Pointers to address related TCP functions
+ *     (i.e. things that depend on the address family)
+ */
+
+struct tcp_func {
+       int                     (*build_net_header)     (struct sock *sk, 
+                                                        struct sk_buff *skb);
+
+       void                    (*queue_xmit)           (struct sock *sk, 
+                                                        struct device *dev,
+                                                        struct sk_buff *skb, 
+                                                        int free);
+
+       void                    (*send_check)           (struct sock *sk,
+                                                        struct tcphdr *th,
+                                                        int len,
+                                                        struct sk_buff *skb);
+
+       int                     (*rebuild_header)       (struct sock *sk,
+                                                        struct sk_buff *skb);
+
+       int                     (*conn_request)         (struct sock *sk,
+                                                        struct sk_buff *skb,
+                                                        void *opt,
+                                                        __u32 isn);
+
+       struct sock *           (*syn_recv_sock)        (struct sock *sk,
+                                                        struct sk_buff *skb,
+                                                        struct open_request *req);
+       
+       __u32                   (*init_sequence)        (struct sock *sk,
+                                                        struct sk_buff *skb);
+
+       struct sock *           (*get_sock)             (struct sk_buff *skb,
+                                                        struct tcphdr *th);
+
+       int                     (*setsockopt)           (struct sock *sk, 
+                                                        int level, 
+                                                        int optname, 
+                                                        char *optval, 
+                                                        int optlen);
+
+       int                     (*getsockopt)           (struct sock *sk, 
+                                                        int level, 
+                                                        int optname, 
+                                                        char *optval, 
+                                                        int *optlen);
+
+
+       void                    (*addr2sockaddr)        (struct sock *sk,
+                                                        struct sockaddr *);
+
+       int sockaddr_len;
+};
 
 /*
  * The next routines deal with comparing 32 bit unsigned ints
@@ -111,40 +233,95 @@ extern __inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
        return (after(seq1+1, seq2) && before(seq1, seq3+1));
 }
 
-static __inline__ int min(unsigned int a, unsigned int b)
-{
-       if (a > b)
-               a = b;
-       return a;
-}
-
-static __inline__ int max(unsigned int a, unsigned int b)
-{
-       if (a < b)
-               a = b;
-       return a;
-}
 
 extern struct proto tcp_prot;
 extern struct tcp_mib tcp_statistics;
 
-extern void    tcp_err(int type, int code, unsigned char *header, __u32 daddr,
-                       __u32, struct inet_protocol *protocol);
-extern void    tcp_shutdown (struct sock *sk, int how);
-extern int     tcp_rcv(struct sk_buff *skb, struct device *dev,
-                       struct options *opt, __u32 daddr,
-                       unsigned short len, __u32 saddr, int redo,
-                       struct inet_protocol *protocol);
+extern void                    tcp_v4_err(int type, int code,
+                                          unsigned char *header, __u32 info,
+                                          __u32 daddr, __u32 saddr,
+                                          struct inet_protocol *protocol);
+
+extern void                    tcp_shutdown (struct sock *sk, int how);
+
+extern int                     tcp_v4_rcv(struct sk_buff *skb, 
+                                          struct device *dev,
+                                          struct options *opt, __u32 daddr,
+                                          unsigned short len, __u32 saddr, 
+                                          int redo,
+                                          struct inet_protocol *protocol);
+
+extern int                     tcp_do_sendmsg(struct sock *sk, 
+                                              int iovlen, struct iovec *iov,
+                                              int len, int nonblock, 
+                                              int flags);
+
+extern int                     tcp_ioctl(struct sock *sk, 
+                                         int cmd, 
+                                         unsigned long arg);
+
+extern int                     tcp_rcv_state_process(struct sock *sk, 
+                                                     struct sk_buff *skb,
+                                                     struct tcphdr *th,
+                                                     void *opt, __u16 len);
+
+extern void                    tcp_rcv_established(struct sock *sk, 
+                                                   struct sk_buff *skb,
+                                                   struct tcphdr *th, 
+                                                   __u16 len);
+
+extern void                    tcp_close(struct sock *sk, 
+                                         unsigned long timeout);
+extern struct sock *           tcp_accept(struct sock *sk, int flags);
+extern int                     tcp_select(struct sock *sk, int sel_type, 
+                                          select_table *wait);
+extern int                     tcp_getsockopt(struct sock *sk, int level, 
+                                              int optname, char *optval, 
+                                              int *optlen);
+extern int                     tcp_setsockopt(struct sock *sk, int level, 
+                                              int optname, char *optval, 
+                                              int optlen);
+extern void                    tcp_set_keepalive(struct sock *sk, int val);
+extern int                     tcp_recvmsg(struct sock *sk, 
+                                           struct msghdr *msg,
+                                           int len, int nonblock, 
+                                           int flags, int *addr_len);
+
+extern int                     tcp_parse_options(struct tcphdr *th);
+
+/*
+ *     TCP v4 functions exported for the inet6 API
+ */
+
+extern int                     tcp_v4_rebuild_header(struct sock *sk, 
+                                                     struct sk_buff *skb);
+
+extern int                     tcp_v4_build_header(struct sock *sk, 
+                                                   struct sk_buff *skb);
+
+extern void                    tcp_v4_send_check(struct sock *sk, 
+                                                 struct tcphdr *th, int len, 
+                                                 struct sk_buff *skb);
+
+extern int                     tcp_v4_conn_request(struct sock *sk,
+                                                   struct sk_buff *skb,
+                                                   void *ptr, __u32 isn);
+
+extern struct sock *           tcp_v4_syn_recv_sock(struct sock *sk,
+                                                    struct sk_buff *skb,
+                                                    struct open_request *req);
+
+extern int                     tcp_v4_backlog_rcv(struct sock *sk,
+                                                  struct sk_buff *skb);
+extern int                     tcp_v4_connect(struct sock *sk,
+                                              struct sockaddr *uaddr,
+                                              int addr_len);
 
-extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 
 extern void tcp_read_wakeup(struct sock *);
 extern void tcp_write_xmit(struct sock *);
 extern void tcp_time_wait(struct sock *);
-extern void tcp_retransmit(struct sock *, int);
 extern void tcp_do_retransmit(struct sock *, int);
-extern void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
-               unsigned long daddr, int len, struct sk_buff *skb);
 
 /* tcp_output.c */
 
@@ -152,16 +329,10 @@ extern void tcp_send_probe0(struct sock *);
 extern void tcp_send_partial(struct sock *);
 extern void tcp_write_wakeup(struct sock *);
 extern void tcp_send_fin(struct sock *sk);
-extern void tcp_send_synack(struct sock *, struct sock *, struct sk_buff *);
-extern void tcp_send_skb(struct sock *, struct sk_buff *);
+extern int  tcp_send_synack(struct sock *);
+extern int  tcp_send_skb(struct sock *, struct sk_buff *);
 extern void tcp_send_ack(struct sock *sk);
-extern void tcp_send_delayed_ack(struct sock *sk, int max_timeout, unsigned long timeout);
-extern void tcp_send_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
-         struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl);
-
-extern void tcp_enqueue_partial(struct sock *, struct sk_buff *);
-extern struct sk_buff * tcp_dequeue_partial(struct sock *);
-extern void tcp_shrink_skb(struct sock *,struct sk_buff *,u32);
+extern void tcp_send_delayed_ack(struct sock *sk, int max_timeout);
 
 /* tcp_input.c */
 extern void tcp_cache_zap(void);
@@ -170,44 +341,152 @@ extern void tcp_cache_zap(void);
 extern int tcp_chkaddr(struct sk_buff *);
 
 /* tcp_timer.c */
-#define     tcp_reset_msl_timer(x,y,z) reset_timer(x,y,z)
+#define     tcp_reset_msl_timer(x,y,z) net_reset_timer(x,y,z)
 extern void tcp_reset_xmit_timer(struct sock *, int, unsigned long);
-extern void tcp_delack_timer(unsigned long);
+extern void tcp_clear_xmit_timer(struct sock *, int);
+extern int  tcp_timer_is_set(struct sock *, int);
+extern void tcp_init_xmit_timers(struct sock *);
+extern void tcp_clear_xmit_timers(struct sock *);
+
 extern void tcp_retransmit_timer(unsigned long);
+extern void tcp_delack_timer(unsigned long);
+extern void tcp_probe_timer(unsigned long);
 
-static __inline__ int tcp_old_window(struct sock * sk)
-{
-       return sk->window - (sk->acked_seq - sk->lastwin_seq);
-}
 
-extern int tcp_new_window(struct sock *);
+/*
+ *     TCP slow timer
+ */
+extern struct timer_list       tcp_slow_timer;
+
+struct tcp_sl_timer {
+       atomic_t        count;
+       unsigned long   period;
+       unsigned long   last;
+       void (*handler) (unsigned long);
+};
+
+#define TCP_SLT_SYNACK         0
+#define TCP_SLT_KEEPALIVE      1
+#define TCP_SLT_MAX            2
 
+extern struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX];
 /*
- * Return true if we should raise the window when we
- * have cleaned up the receive queue. We don't want to
- * do this normally, only if it makes sense to avoid
- * zero window probes..
- *
- * We do this only if we can raise the window noticeably.
+ *      This function returns the amount that we can raise the
+ *      usable window based on the following constraints
+ *  
+ *     1. The window can never be shrunk once it is offered (RFC 793)
+ *     2. We limit memory per socket
  */
-static __inline__ int tcp_raise_window(struct sock * sk)
+
+static __inline__ unsigned short tcp_raise_window(struct sock *sk)
 {
-       int new = tcp_new_window(sk);
-       return new && (new >= 2*tcp_old_window(sk));
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+       long free_space = sock_rspace(sk);
+       long window;
+
+       if (free_space > 1024)
+               free_space &= ~0x3FF; 
+
+       if(sk->window_clamp)
+               free_space = min(sk->window_clamp, free_space);
+       /* 
+         * compute the actual window i.e. 
+         * old_window - received_bytes_on_that_win 
+        */
+
+
+       window = tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup);
+
+
+       /*
+        *      We need to send an ack right away if
+        *      our rcv window is blocking the sender and 
+        *      we have more free space to offer.
+        */
+
+       if (window < (sk->mss << 1) && free_space > window)
+               return 1;
+
+       return 0;
 }
 
 static __inline__ unsigned short tcp_select_window(struct sock *sk)
 {
-       int window = tcp_new_window(sk);
-       int oldwin = tcp_old_window(sk);
-
-       /* Don't allow a shrinking window */
-       if (window > oldwin) {
-               sk->window = window;
-               sk->lastwin_seq = sk->acked_seq;
-               oldwin = window;
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+       long free_space = sock_rspace(sk);
+       long window;
+       
+       if (sk->window_clamp)
+               free_space = min(sk->window_clamp, free_space);
+       
+
+       /*
+        * compute the actual window i.e.
+        * old_window - received_bytes_on_that_win
+        */
+
+       window = tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup);
+
+       if ( window < 0 )
+       {
+               window = 0;
+               printk(KERN_DEBUG "TSW: win < 0 w=%d 1=%u 2=%u\n",
+                      tp->rcv_wnd, tp->rcv_nxt, tp->rcv_wup);
        }
-       return oldwin;
+
+       /*
+        * RFC 1122:
+        * "the suggested [SWS] avoidance algoritm for the receiver is to keep
+        *  RECV.NEXT + RCV.WIN fixed until:
+        *  RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
+        *
+        * i.e. don't raise the right edge of the window until you can't raise
+        * it MSS bytes
+        */
+
+       /*
+        * It would be a good idea if it didn't break header prediction.
+        * and BSD made the header predition standard...
+        * It expects the same value in the header i.e. th->window to be
+        * constant [in fact it's a good idea but they could document it
+        * couldn't they ?] [PR].
+        */
+       
+       /*
+        *  If the actual window is blocking the sender then try
+        *  to raise it.
+        */
+       
+       if (window < (sk->mss << 1))
+       {
+               long usable;
+
+               usable = free_space - window;
+
+               if (usable < 0)
+               {
+                       /* shouldn't happen */
+                       usable = 0;
+               }
+
+               tp->rcv_wnd += (min(usable, sk->mss) + 0x3FF) & ~0x3FF;
+       }
+
+#if 0
+       if (tp->rcv_wnd > free_space)
+       {
+               tp->rcv_wnd = free_space & ~0x3FF;
+       }
+#endif
+       if (tp->rcv_wnd < window)
+       {
+               tp->rcv_wnd = (window + 0x3FF) & ~0x3FF;
+       }
+
+       tp->rcv_wup = tp->rcv_nxt;
+       return tp->rcv_wnd;
 }
 
 /*
@@ -227,12 +506,14 @@ extern __inline const int tcp_connected(const int state)
 /*
  * Calculate(/check) TCP checksum
  */
-static __inline__ u16 tcp_check(struct tcphdr *th, int len,
-       unsigned long saddr, unsigned long daddr, unsigned long base)
+static __inline__ u16 tcp_v4_check(struct tcphdr *th, int len,
+                                  unsigned long saddr, unsigned long daddr, 
+                                  unsigned long base)
 {
        return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
 }
 
+
 #undef STATE_TRACE
 
 #ifdef STATE_TRACE
@@ -245,6 +526,7 @@ static char *statename[]={
 
 static __inline__ void tcp_set_state(struct sock *sk, int state)
 {
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
        int oldstate = sk->state;
 
        sk->state = state;
@@ -264,7 +546,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state)
        case TCP_CLOSE:
                tcp_cache_zap();
                /* Should be about 2 rtt's */
-               reset_timer(sk, TIME_DONE, min(sk->rtt * 2, TCP_DONE_TIME));
+               net_reset_timer(sk, TIME_DONE, min(tp->srtt * 2, TCP_DONE_TIME));
                /* fall through */
        default:
                if (oldstate==TCP_ESTABLISHED)
@@ -272,4 +554,79 @@ static __inline__ void tcp_set_state(struct sock *sk, int state)
        }
 }
 
+extern __inline__ void tcp_synq_unlink(struct tcp_opt *tp, struct open_request *req)
+{
+       if (req->dl_next == req)
+       {
+               tp->syn_wait_queue = NULL;
+       }
+       else
+       {
+               req->dl_prev->dl_next = req->dl_next;
+               req->dl_next->dl_prev = req->dl_prev;
+               
+               if (tp->syn_wait_queue == req)
+               {
+                       tp->syn_wait_queue = req->dl_next;
+               }
+       }
+
+       req->dl_prev = req->dl_next = NULL;
+}
+
+extern __inline__ void tcp_synq_queue(struct tcp_opt *tp, struct open_request *req)
+{
+       if (!tp->syn_wait_queue)
+       {
+               req->dl_next = req;
+               req->dl_prev = req;
+               tp->syn_wait_queue = req;
+       }
+       else
+       {
+               struct open_request *list = tp->syn_wait_queue;
+               
+               req->dl_next = list;
+               req->dl_prev = list->dl_prev;
+               list->dl_prev->dl_next = req;
+               list->dl_prev = req;
+       }
+
+}
+
+extern __inline__ void tcp_inc_slow_timer(int timer)
+{
+       struct tcp_sl_timer *slt = &tcp_slt_array[timer];
+       
+       if (slt->count == 0)
+       {
+               unsigned long now = jiffies;
+               unsigned long when;
+               unsigned long next;
+
+               slt->last = now;
+               
+               when = now + slt->period;
+               next = del_timer(&tcp_slow_timer);
+
+               if (next && ((long)(next - when) < 0))
+               {
+                       when = next;
+               }
+               
+               tcp_slow_timer.expires = when;
+               add_timer(&tcp_slow_timer);
+       }
+
+       atomic_inc(&slt->count);
+}
+
+extern __inline__ void tcp_dec_slow_timer(int timer)
+{
+       struct tcp_sl_timer *slt = &tcp_slt_array[timer];
+
+       atomic_dec(&slt->count);
+}
+
 #endif /* _TCP_H */
+
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
new file mode 100644 (file)
index 0000000..412943e
--- /dev/null
@@ -0,0 +1,42 @@
+#ifndef _TRANSP_V6_H
+#define _TRANSP_V6_H
+
+#include <net/checksum.h>
+
+/*
+ *     IPv6 transport protocols
+ */
+
+#ifdef __KERNEL__
+
+extern struct proto rawv6_prot;
+extern struct proto udpv6_prot;
+extern struct proto tcpv6_prot;
+
+extern void                            rawv6_init(void);
+extern void                            udpv6_init(void);
+extern void                            tcpv6_init(void);
+
+extern int                             udpv6_connect(struct sock *sk,
+                                                     struct sockaddr *uaddr,
+                                                     int addr_len);
+
+extern int                     datagram_recv_ctl(struct sock *sk,
+                                                 struct msghdr *msg,
+                                                 struct sk_buff *skb);
+
+extern int                     datagram_send_ctl(struct msghdr *msg,
+                                                 struct device **src_dev,
+                                                 struct in6_addr **src_addr,
+                                                 struct ipv6_options *opt);
+
+#define                LOOPBACK4_IPV6          __constant_htonl(0x7f000006)
+
+/*
+ *     address family specific functions
+ */
+extern struct tcp_func ipv4_specific;
+
+#endif
+
+#endif
index fe4723a78c68efe54739517597395eb6ec85b406..df25b24912c221169fe384cb719c93cb988ce0e9 100644 (file)
@@ -31,8 +31,9 @@
 extern struct proto udp_prot;
 
 
-extern void    udp_err(int type, int code, unsigned char *header, __u32 daddr,
-                       __u32 saddr, struct inet_protocol *protocol);
+extern void    udp_err(int type, int code, unsigned char *header,
+                       __u32 info, __u32 daddr, __u32 saddr,
+                       struct inet_protocol *protocol);
 extern void    udp_send_check(struct udphdr *uh, __u32 saddr, 
                        __u32 daddr, int len, struct sock *sk);
 extern int     udp_recvfrom(struct sock *sk, unsigned char *to,
@@ -41,7 +42,11 @@ extern int   udp_recvfrom(struct sock *sk, unsigned char *to,
 extern int     udp_read(struct sock *sk, unsigned char *buff,
                         int len, int noblock, unsigned flags);
 extern int     udp_connect(struct sock *sk,
-                           struct sockaddr_in *usin, int addr_len);
+                           struct sockaddr *usin, int addr_len);
+
+extern int     udp_sendmsg(struct sock *sk, struct msghdr *msg,
+                           int len, int noblock, int flags);
+
 extern int     udp_rcv(struct sk_buff *skb, struct device *dev,
                        struct options *opt, __u32 daddr,
                        unsigned short len, __u32 saddr, int redo,
index 733e28d0be3280c60c0e92532723c4570e5bbd08..aed6b362553e9b9c0e04f399461a32e885ab2f46 100644 (file)
@@ -838,6 +838,7 @@ asmlinkage void start_kernel(void)
        check_bugs();
 
        printk(linux_banner);
+       printk("POSIX conformance testing by UNIFIX\n");
 #ifdef __SMP__
        smp_init();
 #endif
index 07726e9d7fc89d2917ff0973ef4328332a47508a..617e7d3ed18012a69984137b177557d3d3226480 100644 (file)
@@ -252,7 +252,7 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
        p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
        init_timer(&p->real_timer);
        p->real_timer.data = (unsigned long) p;
-       p->leader = 0;          /* process leadership doesn't inherit */
+       p->leader = 0;          /* session leadership doesn't inherit */
        p->tty_old_pgrp = 0;
        p->utime = p->stime = 0;
        p->cutime = p->cstime = 0;
index c848430ccf67d323e36e9d516fc2920e4b2168e0..77c28b46bef78367948ed5ba4d77ac1bfad3fb48 100644 (file)
@@ -49,6 +49,9 @@
 #include <linux/ctype.h>
 
 extern unsigned char aux_device_present, kbd_read_mask;
+#ifdef __i386__
+       extern struct drive_info_struct drive_info;
+#endif
 
 #ifdef CONFIG_PCI
 #include <linux/bios32.h>
@@ -186,6 +189,10 @@ struct symbol_table symbol_table = {
        X(blkdev_release),
        X(gendisk_head),
        X(resetup_one_dev),
+       X(unplug_device),
+#ifdef __i386__
+       X(drive_info),
+#endif
 
 #ifdef CONFIG_SERIAL   
        /* Module creation of serial units */
@@ -219,6 +226,11 @@ struct symbol_table symbol_table = {
        /* sysctl table registration */
        X(register_sysctl_table),
        X(unregister_sysctl_table),
+       X(sysctl_string),
+       X(sysctl_intvec),
+       X(proc_dostring),
+       X(proc_dointvec),
+       X(proc_dointvec_minmax),
 
        /* interrupt handling */
        X(request_irq),
@@ -285,6 +297,7 @@ struct symbol_table symbol_table = {
        X(sys_call_table),
        X(hard_reset_now),
        X(_ctype),
+       X(secure_tcp_sequence_number),
 
        /* Signal interfaces */
        X(send_sig),
@@ -330,11 +343,6 @@ struct symbol_table symbol_table = {
        X(aux_device_present),
        X(kbd_read_mask),
 
-#ifdef CONFIG_BLK_DEV_IDE_PCMCIA
-       X(ide_register),
-       X(ide_unregister),
-#endif
-
 #ifdef CONFIG_BLK_DEV_MD
        X(disk_name),   /* for md.c */
 #endif
index f83666193eabcceaca519edd1e7be25ef66dfdcb..14717bec3eb3cc628dbc1843eeec7139c4d7f05e 100644 (file)
@@ -49,7 +49,7 @@
 #ifdef CONFIG_MODULES          /* a *big* #ifdef block... */
 
 static struct module kernel_module;
-static struct module *module_list = &kernel_module;
+struct module *module_list = &kernel_module;
 
 static int freeing_modules; /* true if some modules are marked for deletion */
 
@@ -117,6 +117,8 @@ sys_create_module(char *module_name, unsigned long size)
        mp->addr = addr;
        mp->state = MOD_UNINITIALIZED;
        mp->cleanup = NULL;
+       mp->exceptinfo.start = NULL;
+       mp->exceptinfo.stop = NULL;
 
        * (long *) addr = 0;    /* set use count to zero */
        module_list = mp;       /* link it in */
@@ -173,7 +175,12 @@ sys_init_module(char *module_name, char *code, unsigned codesize,
                mp->size * PAGE_SIZE - (codesize + sizeof (long)));
        pr_debug("module init entry = 0x%08lx, cleanup entry = 0x%08lx\n",
                (unsigned long) rt.init, (unsigned long) rt.cleanup);
+       if (rt.signature != MODULE_2_1_7_SIG){
+               printk ("Older insmod used with kernel 2.1.7 +\n");
+               return -EINVAL;
+       }
        mp->cleanup = rt.cleanup;
+       mp->exceptinfo = rt.exceptinfo;
 
        /* update kernel symbol table */
        if (symtab) { /* symtab == NULL means no new entries to handle */
index 3560a6d46a550a3111e960e6bb659a25b5dcf0d9..73bb8e476c2fb705d80b2110e0e088a5a07566a5 100644 (file)
@@ -1192,20 +1192,19 @@ asmlinkage int sys_nice(int increment)
 
 #endif
 
-static struct task_struct *find_process_by_pid(pid_t pid) {
-       struct task_struct *p, *q;
-
-       if (pid == 0)
-               p = current;
-       else {
-               p = 0;
-               for_each_task(q) {
-                       if (q && q->pid == pid) {
-                               p = q;
-                               break;
-                       }
+static struct task_struct *find_process_by_pid(pid_t pid)
+{
+       struct task_struct *p;
+
+       p = current;
+       if (pid) {
+               for_each_task(p) {
+                       if (p->pid == pid)
+                               goto found;
                }
+               p = NULL;
        }
+found:
        return p;
 }
 
index 9b71940ac5661662a8f4db90981180b475376097..2db109b4b2f468828e234871a36e4d37fbffe6bc 100644 (file)
@@ -6,6 +6,7 @@
  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
+ * Dynamic registration fixes, Stephen Tweedie.
  */
 
 #include <linux/config.h>
@@ -203,7 +204,7 @@ int do_sysctl (int *name, int nlen,
        do {
                context = 0;
                error = parse_table(name, nlen, oldval, oldlenp, 
-                                   newval, newlen, root_table, &context);
+                                   newval, newlen, tmp->ctl_table, &context);
                if (context)
                        kfree(context);
                if (error != -ENOTDIR)
@@ -401,9 +402,11 @@ void unregister_sysctl_table(struct ctl_table_header * table)
 /* Scan the sysctl entries in table and add them all into /proc */
 static void register_proc_table(ctl_table * table, struct proc_dir_entry *root)
 {
-       struct proc_dir_entry *de;
+       struct proc_dir_entry *de, *tmp;
+       int exists;
        
        for (; table->ctl_name; table++) {
+               exists = 0;
                /* Can't do anything without a proc name. */
                if (!table->procname)
                        continue;
@@ -432,12 +435,24 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root)
                }
                /* Otherwise it's a subdir */
                else  {
-                       de->ops = &proc_dir_inode_operations;
-                       de->nlink++;
-                       de->mode |= S_IFDIR;
+                       /* First check to see if it already exists */
+                       for (tmp = root->subdir; tmp; tmp = tmp->next) {
+                               if (tmp->namelen == de->namelen &&
+                                   !memcmp(tmp->name,de->name,de->namelen)) {
+                                       exists = 1;
+                                       kfree (de);
+                                       de = tmp;
+                               }
+                       }
+                       if (!exists) {
+                               de->ops = &proc_dir_inode_operations;
+                               de->nlink++;
+                               de->mode |= S_IFDIR;
+                       }
                }
                table->de = de;
-               proc_register_dynamic(root, de);
+               if (!exists)
+                       proc_register_dynamic(root, de);
                if (de->mode & S_IFDIR )
                        register_proc_table(table->child, de);
        }
@@ -456,8 +471,12 @@ static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root
                        }
                        unregister_proc_table(table->child, de);
                }
-               proc_unregister(root, de->low_ino);
-               kfree(de);                      
+               /* Don't unregister proc directories which still have
+                  entries... */
+               if (!((de->mode & S_IFDIR) && de->subdir)) {
+                       proc_unregister(root, de->low_ino);
+                       kfree(de);
+               }
        }
 }
 
index bc6e936e20107bf772173c5ceac371e7ed26a5b9..97ab97a4cc83f70abf4a92f5bccf598674b55621 100644 (file)
@@ -8,7 +8,12 @@ bool 'Network aliasing'  CONFIG_NET_ALIAS
 bool 'TCP/IP networking' CONFIG_INET
 if [ "$CONFIG_INET" = "y" ]; then
   source net/ipv4/Config.in
+
+  if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+    tristate 'The IPv6 protocol' CONFIG_IPV6
+  fi
 fi
+
 comment ' '
 tristate 'The IPX protocol' CONFIG_IPX
 if [ "$CONFIG_IPX" != "n" ]; then
index 66678158fa571e88585e6458060d7fbecabcba3d..24fdc93ec8c57e0245c0abe0ca6d0c82391e2940 100644 (file)
@@ -8,18 +8,27 @@
 # Note 2! The CFLAGS definition is now in the main makefile...
 
 MOD_SUB_DIRS := ipv4
-ALL_SUB_DIRS := 802 ax25 bridge core ethernet ipv4 ipx unix appletalk netrom #decnet
+ALL_SUB_DIRS := 802 ax25 bridge core ethernet ipv4 ipv6 ipx unix appletalk \
+               netrom #decnet
 SUB_DIRS     := core ethernet unix
 MOD_LIST_NAME := NET_MISC_MODULES
 
 ifeq ($(CONFIG_NET),y)
 SUB_DIRS += 802
-endif
+endif 
 
 ifeq ($(CONFIG_INET),y)
 SUB_DIRS += ipv4
 endif
 
+ifeq ($(CONFIG_IPV6),y)
+SUB_DIRS += ipv6
+else
+  ifeq ($(CONFIG_IPV6),m)
+  MOD_SUB_DIRS += ipv6
+  endif
+endif
+
 ifeq ($(CONFIG_BRIDGE),y)
 SUB_DIRS += bridge
 endif
@@ -58,10 +67,29 @@ endif
 
 M_OBJS      :=
 
-ifeq ($(CONFIG_NETLINK),y)
+CONFIG_NETLINK_BUILTIN :=
+CONFIG_NETLINK_MODULE :=
+
+ifeq ($(CONFIG_NETLINK), y)
+  CONFIG_NETLINK_BUILTIN = y
+endif
+
+ifeq ($(CONFIG_IPV6), y)
+  CONFIG_NETLINK_BUILTIN = y
+endif
+
+ifeq ($(CONFIG_NETLINK), m)
+  CONFIG_NETLINK_MODULE = y
+endif
+
+ifeq ($(CONFIG_IPV6), m)
+  CONFIG_NETLINK_MODULE = y
+endif
+
+ifdef CONFIG_NETLINK_BUILTIN
 L_OBJS += netlink.o
 else
-  ifeq ($(CONFIG_NETLINK),m)
+  ifdef CONFIG_NETLINK_MODULE
     M_OBJS += netlink.o
   endif
 endif
index 587adf636cf42c9ba96655f6158bf49c79a366e4..95edf89def7d22e14335e5ebc8cbd86d6f72cd56 100644 (file)
@@ -267,7 +267,7 @@ void root_selection(void)
                                  (((port_info[port_no].designated_cost
                                     + port_info[port_no].path_cost
                                     )
-                                   ==
+                                   <
                                    (port_info[root_port].designated_cost
                                     + port_info[root_port].path_cost
                                     )            /* (4.6.8.3.1(2)) */
index b79d4a1f813a9e907b0dce5130febf203b1d4f9e..d50b2eed5fed65502e2f3cf510815d576bba280a 100644 (file)
@@ -45,6 +45,7 @@
  *             Alan Cox        :       Cleaned up the backlog initialise.
  *             Craig Metz      :       SIOCGIFCONF fix if space for under
  *                                     1 device.
+ *             Molnar Ingo     :       skb->stamp hack for the Pentium
  *         Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  *                                     is no device open function.
  *
@@ -118,14 +119,6 @@ static struct sk_buff_head backlog;
  
 static int backlog_size = 0;
 
-/*
- *     Return the lesser of the two values. 
- */
-static __inline__ unsigned long min(unsigned long a, unsigned long b)
-{
-       return (a < b)? a : b;
-}
 
 
 /******************************************************************************************
@@ -418,6 +411,10 @@ static void do_dev_queue_xmit(struct sk_buff *skb, struct device *dev, int pri)
                /* copy outgoing packets to any sniffer packet handlers */
                if (dev_nit) {
                        struct packet_type *ptype;
+#ifdef CONFIG_M586
+                        struct timeval dummy_tv;
+                       do_gettimeofday( &dummy_tv );
+#endif
                        skb->stamp=xtime;
                        for (ptype = ptype_all; ptype!=NULL; ptype = ptype->next) 
                        {
@@ -489,7 +486,13 @@ void netif_rx(struct sk_buff *skb)
        skb->sk = NULL;
        skb->free = 1;
        if(skb->stamp.tv_sec==0)
+       {
+#ifdef CONFIG_M586
+                        struct timeval dummy_tv;
+                       do_gettimeofday( &dummy_tv );
+#endif
                skb->stamp = xtime;
+       }
 
        /*
         *      Check that we aren't overdoing things.
@@ -1074,7 +1077,7 @@ static int dev_ifsioc(void *arg, unsigned int getset)
                        goto rarok;
        
                case SIOCSIFADDR:       /* Set interface address (and family) */
-               
+
                        /*
                         *      BSDism. SIOCSIFADDR family=AF_UNSPEC sets the
                         *      physical address. We can cope with this now.
@@ -1107,7 +1110,7 @@ static int dev_ifsioc(void *arg, unsigned int getset)
 
 #ifdef CONFIG_NET_ALIAS
                                if (net_alias_is(dev))
-                               net_alias_dev_rehash(dev ,&ifr.ifr_addr);
+                                       net_alias_dev_rehash(dev ,&ifr.ifr_addr);
 #endif
                                dev->pa_addr = new_pa_addr;
                                dev->family = new_family;
@@ -1204,7 +1207,7 @@ static int dev_ifsioc(void *arg, unsigned int getset)
                                return -EINVAL;
 
                        if (dev->change_mtu)
-                               ret = (*dev->change_mtu)(dev, ifr.ifr_mtu);
+                               ret = dev->change_mtu(dev, ifr.ifr_mtu);
                        else
                        {
                                dev->mtu = ifr.ifr_mtu;
index 9059ca6b329591c056e2aea4a4bfa4e0137b52f9..77bd7172e47057beb580c3bafadfcdd5e2ec8341 100644 (file)
@@ -9,6 +9,8 @@
  *
  *     Fixes:
  *             Andrew Lunn     :       Errors in iovec copying.
+ *             Pedro Roque     :       Added memcpy_fromiovecend and
+ *                                     csum_..._fromiovecend.
  */
 
 
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/net.h>
+#include <linux/in6.h>
 #include <asm/uaccess.h>
-
+#include <asm/byteorder.h>
+#include <asm/checksum.h>
 
 extern inline int min(int x, int y)
 {
@@ -104,3 +108,130 @@ void memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
                iov++;
        }
 }
+
+
+/*
+ *     For use with ip_build_xmit
+ */
+
+void memcpy_fromiovecend(unsigned char *kdata, struct iovec *iov, int offset, 
+                        int len)
+{
+       while(offset>0)
+       {
+               if (offset > iov->iov_len)
+               {
+                       offset -= iov->iov_len;
+
+               }
+               else
+               {
+                       u8 *base;
+                       int copy;
+
+                       base = iov->iov_base + offset;
+                       copy = min(len, iov->iov_len - offset);
+                       offset = 0;
+
+                       copy_from_user(kdata, base, copy);
+                       len-=copy;
+                       kdata+=copy;
+               }
+               iov++;  
+       }
+
+       while (len>0)
+       {
+               int copy=min(len, iov->iov_len);
+               copy_from_user(kdata, iov->iov_base, copy);
+               len-=copy;
+               kdata+=copy;
+               iov++;
+       }
+}
+
+/*
+ *     And now for the all-in-one: copy and checksum from a user iovec
+ *     directly to a datagram
+ *     Calls to csum_partial but the last must be in 32 bit chunks
+ *
+ *     ip_build_xmit must ensure that when fragmenting only the last
+ *     call to this function will be unaligned also.
+ */
+
+unsigned int csum_partial_copy_fromiovecend(unsigned char *kdata, 
+                                           struct iovec *iov, int offset, 
+                                           int len, int csum)
+{
+       __u32   partial;
+       __u32   partial_cnt = 0;
+
+       while(offset>0)
+       {
+               if (offset > iov->iov_len)
+               {
+                       offset -= iov->iov_len;
+
+               }
+               else
+               {
+                       u8 *base;
+                       int copy;
+
+                       base = iov->iov_base + offset;
+                       copy = min(len, iov->iov_len - offset);
+                       offset = 0;
+
+                       partial_cnt = copy % 4;
+                       if (partial_cnt)
+                       {
+                               copy -= partial_cnt;
+                               copy_from_user(&partial, base + copy,
+                                              partial_cnt);
+                       }
+
+                       csum = csum_partial_copy_fromuser(base, kdata, 
+                                                         copy, csum);
+
+                       len   -= copy + partial_cnt;
+                       kdata += copy + partial_cnt;
+               }
+               iov++;                  
+       }
+
+       while (len>0)
+       {
+               u8 *base = iov->iov_base;
+               int copy=min(len, iov->iov_len);
+               
+               if (partial_cnt)
+               {
+                       int par_len = 4 - partial_cnt;
+
+                       copy_from_user(&partial, base + partial_cnt, par_len);
+                       csum = csum_partial((u8*) &partial, 4, csum);
+                       base += par_len;
+                       copy -= par_len;
+                       partial_cnt = 0;
+               }
+
+               if (len - copy > 0)
+               {
+                       partial_cnt = copy % 4;
+                       if (partial_cnt)
+                       {
+                               copy -= partial_cnt;
+                               copy_from_user(&partial, base + copy,
+                                              partial_cnt);
+                       }
+               }
+
+               csum = csum_partial_copy_fromuser(base, kdata, 
+                                                 copy, csum);
+               len   -= copy + partial_cnt;
+               kdata += copy + partial_cnt;
+               iov++;
+       }
+
+       return csum;
+}
index f9c613f186fef373a50192d3d141bb4a8de0c90b..777301672e9ca12ff38c4b88e913cceb75deccb4 100644 (file)
 #include <linux/interrupt.h>
 #include <linux/in.h>
 #include <linux/inet.h>
-#include <linux/netdevice.h>
 #include <linux/malloc.h>
+#include <linux/netdevice.h>
 #include <linux/string.h>
 #include <linux/skbuff.h>
 
 #include <net/ip.h>
+#include <net/ipv6.h>
 #include <net/protocol.h>
 #include <net/route.h>
 #include <net/tcp.h>
@@ -702,6 +703,7 @@ struct sk_buff *alloc_skb(unsigned int size,int priority)
        skb->end=bptr+len;
        skb->len=0;
        skb->destructor=NULL;
+       skb->inclone = 0;
        return skb;
 }
 
@@ -729,7 +731,8 @@ void kfree_skbmem(struct sk_buff *skb)
                        addr = skb;
                        __kfree_skbmem(skb->data_skb);
                }
-               kfree(addr);
+               if (!skb->inclone)
+                       kfree(addr);
                atomic_dec(&net_skbcount);
        }
 }
@@ -742,11 +745,21 @@ void kfree_skbmem(struct sk_buff *skb)
 struct sk_buff *skb_clone(struct sk_buff *skb, int priority)
 {
        struct sk_buff *n;
-
+       int inbuff = 0;
+       
        IS_SKB(skb);
-       n = kmalloc(sizeof(*n), priority);
-       if (!n)
-               return NULL;
+       if (skb_tailroom(skb) >= sizeof(struct sk_buff))
+       {
+               n = ((struct sk_buff *) skb->end) - 1;
+               skb->end -= sizeof(struct sk_buff);
+               inbuff = 1;
+       }
+       else
+       {
+               n = kmalloc(sizeof(*n), priority);
+               if (!n)
+                       return NULL;
+       }
        memcpy(n, skb, sizeof(*n));
        n->count = 1;
        if (skb->data_skb)
@@ -762,6 +775,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int priority)
        n->tries = 0;
        n->lock = 0;
        n->users = 0;
+       n->inclone = inbuff;
        return n;
 }
 
@@ -804,6 +818,10 @@ struct sk_buff *skb_copy(struct sk_buff *skb, int priority)
        n->h.raw=skb->h.raw+offset;
        n->mac.raw=skb->mac.raw+offset;
        n->ip_hdr=(struct iphdr *)(((char *)skb->ip_hdr)+offset);
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+       n->ipv6_hdr=(struct ipv6hdr *)(((char *)skb->ipv6_hdr)+offset);
+       n->nexthop = skb->nexthop;
+#endif
        n->saddr=skb->saddr;
        n->daddr=skb->daddr;
        n->raddr=skb->raddr;
index db9fbbbf335d9c1f96e7abf9e4b60e0ed7bb17c3..b5fcaa09cf23a5eb66d09721c66355641da4e76c 100644 (file)
@@ -192,6 +192,10 @@ int sock_setsockopt(struct sock *sk, int level, int optname,
                        return(0);
 
                case SO_KEEPALIVE:
+                       if (sk->protocol == IPPROTO_TCP)
+                       {
+                               tcp_set_keepalive(sk, valbool);
+                       }
                        sk->keepopen = valbool;
                        return(0);
 
@@ -536,7 +540,7 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, unsigne
 void __release_sock(struct sock *sk)
 {
 #ifdef CONFIG_INET
-       if (!sk->prot || !sk->prot->rcv)
+       if (!sk->prot || !sk->backlog_rcv)
                return;
                
        /* See if we have any packets built up. */
@@ -544,10 +548,7 @@ void __release_sock(struct sock *sk)
        while (!skb_queue_empty(&sk->back_log)) {
                struct sk_buff * skb = sk->back_log.next;
                __skb_unlink(skb, &sk->back_log);
-               sk->prot->rcv(skb, skb->dev, (struct options*)skb->proto_priv,
-                             skb->saddr, skb->len, skb->daddr, 1,
-                             /* Only used for/by raw sockets. */
-                             (struct inet_protocol *)sk->pair); 
+               sk->backlog_rcv(sk, skb);
        }
        end_bh_atomic();
 #endif  
index e4a80d3886e5fa9e8e26796132a06b9ee2cdd596..4872fd2b5852594d9e76a2082c93f9e1d1faead6 100644 (file)
 #include <linux/config.h>
 #include <net/arp.h>
 #include <net/sock.h>
+#include <net/ipv6.h>
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#include <linux/in6.h>
+#include <net/ndisc.h>
+#endif
+
 #include <asm/checksum.h>
 
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+int (*ndisc_eth_hook) (unsigned char *, struct device *, 
+                      struct sk_buff *) = NULL;
+#endif
+
 void eth_setup(char *str, int *ints)
 {
        struct device *d = dev_base;
@@ -143,24 +156,43 @@ int eth_rebuild_header(void *buff, struct device *dev, unsigned long dst,
        struct ethhdr *eth = (struct ethhdr *)buff;
 
        /*
-        *      Only ARP/IP is currently supported
+        *      Only ARP/IP and NDISC/IPv6 are currently supported
         */
-        
-       if(eth->h_proto != htons(ETH_P_IP)) 
+       
+       switch (eth->h_proto)
        {
-               printk(KERN_DEBUG "%s: unable to resolve type %X addresses.\n",dev->name,(int)eth->h_proto);
+#ifdef CONFIG_INET
+       case __constant_htons(ETH_P_IP):
+
+               /*
+                *      Try to get ARP to resolve the header.
+                */
+
+               return (arp_find(eth->h_dest, dst, dev, dev->pa_addr, skb) ? 
+                       1 : 0);
+               break;
+#endif
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+       case __constant_htons(ETH_P_IPV6):
+#ifdef CONFIG_IPV6
+               return (ndisc_eth_resolv(eth->h_dest, dev, skb));
+#else
+               if (ndisc_eth_hook)
+                       return (ndisc_eth_hook(eth->h_dest, dev, skb));
+#endif
+#endif 
+       default:
+               printk(KERN_DEBUG 
+                      "%s: unable to resolve type %X addresses.\n", 
+                      dev->name, (int)eth->h_proto);
+               
                memcpy(eth->h_source, dev->dev_addr, dev->addr_len);
                return 0;
+               break;
        }
 
-       /*
-        *      Try to get ARP to resolve the header.
-        */
-#ifdef CONFIG_INET      
-       return arp_find(eth->h_dest, dst, dev, dev->pa_addr, skb)? 1 : 0;
-#else
        return 0;       
-#endif 
 }
 
 
index d4f7655ea02b64cf87891a6a98506c8f0f18d098..2ca338c049c37cc598725dcbccf861641970ffc2 100644 (file)
@@ -11,7 +11,7 @@ O_TARGET := ipv4.o
 IPV4_OBJS := utils.o route.o proc.o timer.o protocol.o packet.o \
             ip_input.o ip_fragment.o ip_forward.o ip_options.o \
             ip_output.o ip_sockglue.o \
-            tcp.o tcp_input.o tcp_output.o tcp_timer.o \
+            tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o\
             raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o ip_fw.o \
             sysctl_net_ipv4.o
 
index 8dd763eac5f2eb3b3a023407cc3a934248b9969b..89f196053a201ffd029fd5e5e19ab5678472b364 100644 (file)
@@ -114,6 +114,11 @@ extern int afinet_get_info(char *, char **, off_t, int, int);
 extern int tcp_get_info(char *, char **, off_t, int, int);
 extern int udp_get_info(char *, char **, off_t, int, int);
 
+
+struct sock * tcp_sock_array[SOCK_ARRAY_SIZE];
+struct sock * udp_sock_array[SOCK_ARRAY_SIZE];
+struct sock * raw_sock_array[SOCK_ARRAY_SIZE];
+
 #ifdef CONFIG_DLCI
 extern int dlci_ioctl(unsigned int, void*);
 #endif
@@ -205,7 +210,7 @@ unsigned short get_new_socknum(struct proto *prot, unsigned short base)
  *     Add a socket into the socket tables by number.
  */
 
-void put_sock(unsigned short num, struct sock *sk)
+void inet_put_sock(unsigned short num, struct sock *sk)
 {
        struct sock **skp, *tmp;
        int mask;
@@ -266,7 +271,7 @@ void put_sock(unsigned short num, struct sock *sk)
  *     Remove a socket from the socket tables.
  */
 
-static void remove_sock(struct sock *sk1)
+void inet_remove_sock(struct sock *sk1)
 {
        struct sock **p;
        unsigned long flags;
@@ -309,35 +314,16 @@ void destroy_sock(struct sock *sk)
 
        lock_sock(sk);                  /* just to be safe. */
 
-       remove_sock(sk);
   
        /*
         *      Now we can no longer get new packets or once the
         *      timers are killed, send them.
         */
         
-       delete_timer(sk);
-       del_timer(&sk->delack_timer);
-       del_timer(&sk->retransmit_timer);
-       
-       /*
-        *      Drain any partial frames
-        */
-        
-       while ((skb = tcp_dequeue_partial(sk)) != NULL) 
-       {
-               IS_SKB(skb);
-               kfree_skb(skb, FREE_WRITE);
-       }
+       net_delete_timer(sk);
 
-       /*
-        *      Cleanup up the write buffer. 
-        */
-        
-       while((skb = skb_dequeue(&sk->write_queue)) != NULL) {
-               IS_SKB(skb);
-               kfree_skb(skb, FREE_WRITE);
-       }
+       if (sk->prot->destroy)
+               sk->prot->destroy(sk);
        
        /*
         *      Clean up the read buffer.
@@ -358,34 +344,6 @@ void destroy_sock(struct sock *sk)
                kfree_skb(skb, FREE_READ);
        }
 
-       /*
-        *      Now we need to clean up the send head. 
-        */
-        
-       cli();
-       for(skb = sk->send_head; skb != NULL; )
-       {
-               struct sk_buff *skb2;
-
-               /*
-                * We need to remove skb from the transmit queue,
-                * or maybe the arp queue.
-                */
-               if (skb->next  && skb->prev) 
-               {
-                       IS_SKB(skb);
-                       skb_unlink(skb);
-               }
-               skb->dev = NULL;
-               skb2 = skb->link3;
-               kfree_skb(skb, FREE_WRITE);
-               skb = skb2;
-       }
-       sk->send_head = NULL;
-       sk->send_tail = NULL;
-       sk->send_next = NULL;
-       sti();
-
        /*
         *      Now the backlog. 
         */
@@ -415,6 +373,8 @@ void destroy_sock(struct sock *sk)
 
        if (sk->rmem_alloc == 0 && sk->wmem_alloc == 0) 
        {
+               inet_remove_sock(sk);
+
                if(sk->opt)
                        kfree(sk->opt);
                ip_rt_put(sk->ip_route_cache);
@@ -429,12 +389,19 @@ void destroy_sock(struct sock *sk)
        {
                /* this should never happen. */
                /* actually it can if an ack has just been sent. */
-               NETDEBUG(printk("Socket destroy delayed (r=%d w=%d)\n",
-                       sk->rmem_alloc, sk->wmem_alloc));
+               /* 
+                * It's more normal than that...
+                * It can happen because a skb is still in the device queues
+                * [PR]
+                */
+                 
+               printk("Socket destroy delayed (r=%d w=%d)\n",
+                       sk->rmem_alloc, sk->wmem_alloc);
+
                sk->destroy = 1;
                sk->ack_backlog = 0;
                release_sock(sk);
-               reset_timer(sk, TIME_DESTROY, SOCK_DESTROY_TIME);
+               net_reset_timer(sk, TIME_DESTROY, SOCK_DESTROY_TIME);
        }
 }
 
@@ -444,7 +411,7 @@ void destroy_sock(struct sock *sk)
  *     the work.
  */
  
-static int inet_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg)
+int inet_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
        struct sock *sk;
 
@@ -473,7 +440,7 @@ static int inet_fcntl(struct socket *sock, unsigned int cmd, unsigned long arg)
  *     Set socket options on an inet socket.
  */
  
-static int inet_setsockopt(struct socket *sock, int level, int optname,
+int inet_setsockopt(struct socket *sock, int level, int optname,
                    char *optval, int optlen)
 {
        struct sock *sk = (struct sock *) sock->data;  
@@ -489,7 +456,7 @@ static int inet_setsockopt(struct socket *sock, int level, int optname,
  *     Get a socket option on an AF_INET socket.
  */
 
-static int inet_getsockopt(struct socket *sock, int level, int optname,
+int inet_getsockopt(struct socket *sock, int level, int optname,
                    char *optval, int *optlen)
 {
        struct sock *sk = (struct sock *) sock->data;   
@@ -515,7 +482,7 @@ static int inet_autobind(struct sock *sk)
                        return(-EAGAIN);
                udp_cache_zap();
                tcp_cache_zap();
-               put_sock(sk->num, sk);
+               inet_put_sock(sk->num, sk);
                sk->dummy_th.source = ntohs(sk->num);
        }
        return 0;
@@ -525,7 +492,7 @@ static int inet_autobind(struct sock *sk)
  *     Move a socket into listening state.
  */
  
-static int inet_listen(struct socket *sock, int backlog)
+int inet_listen(struct socket *sock, int backlog)
 {
        struct sock *sk = (struct sock *) sock->data;
 
@@ -666,38 +633,38 @@ static int inet_create(struct socket *sock, int protocol)
 #ifdef CONFIG_TCP_NAGLE_OFF
        sk->nonagle = 1;
 #endif  
+       sk->family = AF_INET;
        sk->type = sock->type;
        sk->protocol = protocol;
        sk->allocation = GFP_KERNEL;
        sk->sndbuf = SK_WMEM_MAX;
        sk->rcvbuf = SK_RMEM_MAX;
-       sk->rto = TCP_TIMEOUT_INIT;             /*TCP_WRITE_TIME*/
-       sk->cong_window = 1; /* start with only sending one packet at a time. */
-       sk->ssthresh = 0x7fffffff;
        sk->priority = 1;
+
+       sk->prot = prot;
+       sk->backlog_rcv = prot->backlog_rcv;
+
+       sk->sleep = sock->wait;
+       sock->data =(void *) sk;
+
        sk->state = TCP_CLOSE;
 
-       /* this is how many unacked bytes we will accept for this socket.  */
-       sk->max_unacked = 2048; /* needs to be at most 2 full packets. */
-       sk->delay_acks = 1;
-       sk->max_ack_backlog = SOMAXCONN;
        skb_queue_head_init(&sk->write_queue);
        skb_queue_head_init(&sk->receive_queue);
-       sk->mtu = 576;
-       sk->prot = prot;
-       sk->sleep = sock->wait;
-       init_timer(&sk->timer);
-       init_timer(&sk->delack_timer);
-       init_timer(&sk->retransmit_timer);
+       skb_queue_head_init(&sk->back_log);
+
+
        sk->timer.data = (unsigned long)sk;
        sk->timer.function = &net_timer;
-       skb_queue_head_init(&sk->back_log);
+
        sock->data =(void *) sk;
        sk->ip_ttl=ip_statistics.IpDefaultTTL;
+
        if(sk->type==SOCK_RAW && protocol==IPPROTO_RAW)
                sk->ip_hdrincl=1;
        else
                sk->ip_hdrincl=0;
+
 #ifdef CONFIG_IP_MULTICAST
        sk->ip_mc_loop=1;
        sk->ip_mc_ttl=1;
@@ -709,9 +676,6 @@ static int inet_create(struct socket *sock, int protocol)
         *      if TCP uses it (maybe move to tcp_init later)
         */
        
-       sk->dummy_th.ack=1;     
-       sk->dummy_th.doff=sizeof(struct tcphdr)>>2;
-       
        sk->state_change = def_callback1;
        sk->data_ready = def_callback2;
        sk->write_space = def_callback3;
@@ -725,7 +689,7 @@ static int inet_create(struct socket *sock, int protocol)
         * creation time automatically
         * shares.
         */
-               put_sock(sk->num, sk);
+               inet_put_sock(sk->num, sk);
                sk->dummy_th.source = ntohs(sk->num);
        }
 
@@ -757,7 +721,7 @@ static int inet_dup(struct socket *newsock, struct socket *oldsock)
  *     should refer to it.
  */
  
-static int inet_release(struct socket *sock, struct socket *peer)
+int inet_release(struct socket *sock, struct socket *peer)
 {
        unsigned long timeout;
        struct sock *sk = (struct sock *) sock->data;
@@ -934,12 +898,12 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr,
                }
                sti();
 
-               remove_sock(sk);
+               inet_remove_sock(sk);
                if(sock->type==SOCK_DGRAM)
                        udp_cache_zap();
                if(sock->type==SOCK_STREAM)
                        tcp_cache_zap();
-               put_sock(snum, sk);
+               inet_put_sock(snum, sk);
                sk->dummy_th.source = ntohs(sk->num);
                sk->daddr = 0;
                sk->dummy_th.dest = 0;
@@ -954,8 +918,8 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr,
  *     TCP 'magic' in here.
  */
  
-static int inet_connect(struct socket *sock, struct sockaddr * uaddr,
-                 int addr_len, int flags)
+int inet_connect(struct socket *sock, struct sockaddr * uaddr,
+                int addr_len, int flags)
 {
        struct sock *sk=(struct sock *)sock->data;
        int err;
@@ -981,7 +945,7 @@ static int inet_connect(struct socket *sock, struct sockaddr * uaddr,
                        return(-EAGAIN);
                if (sk->prot->connect == NULL) 
                        return(-EOPNOTSUPP);
-               err = sk->prot->connect(sk, (struct sockaddr_in *)uaddr, addr_len);
+               err = sk->prot->connect(sk, uaddr, addr_len);
                if (err < 0) 
                        return(err);
                sock->state = SS_CONNECTING;
@@ -1036,7 +1000,7 @@ static int inet_socketpair(struct socket *sock1, struct socket *sock2)
  *     Accept a pending connection. The TCP layer now gives BSD semantics.
  */
 
-static int inet_accept(struct socket *sock, struct socket *newsock, int flags)
+int inet_accept(struct socket *sock, struct socket *newsock, int flags)
 {
        struct sock *sk1, *sk2;
        int err;
@@ -1153,8 +1117,8 @@ static int inet_getname(struct socket *sock, struct sockaddr *uaddr,
 
 
 
-static int inet_recvmsg(struct socket *sock, struct msghdr *ubuf, int size, int noblock
-                  int flags, int *addr_len )
+int inet_recvmsg(struct socket *sock, struct msghdr *ubuf, int size
+                int noblock, int flags, int *addr_len)
 {
        struct sock *sk = (struct sock *) sock->data;
        
@@ -1169,8 +1133,8 @@ static int inet_recvmsg(struct socket *sock, struct msghdr *ubuf, int size, int
 }
 
 
-static int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size, int noblock
-          int flags)
+int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size
+                int noblock, int flags)
 {
        struct sock *sk = (struct sock *) sock->data;
        if (sk->shutdown & SEND_SHUTDOWN) 
@@ -1190,7 +1154,7 @@ static int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size, int n
 }
 
 
-static int inet_shutdown(struct socket *sock, int how)
+int inet_shutdown(struct socket *sock, int how)
 {
        struct sock *sk=(struct sock*)sock->data;
 
@@ -1214,7 +1178,7 @@ static int inet_shutdown(struct socket *sock, int how)
 }
 
 
-static int inet_select(struct socket *sock, int sel_type, select_table *wait )
+int inet_select(struct socket *sock, int sel_type, select_table *wait )
 {
        struct sock *sk=(struct sock *) sock->data;
        if (sk->prot->select == NULL) 
@@ -1322,7 +1286,6 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 #else
                        return -ENOPKG;
 #endif                                         
-                       
                case SIOCADDDLCI:
                case SIOCDELDLCI:
 #ifdef CONFIG_DLCI
@@ -1340,7 +1303,7 @@ static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
                                return((*dlci_ioctl_hook)(cmd, (void *) arg));
 #endif
                        return -ENOPKG;
-
+                       
                default:
                        if ((cmd >= SIOCDEVPRIVATE) &&
                           (cmd <= (SIOCDEVPRIVATE + 15)))
@@ -1394,9 +1357,9 @@ static __inline__ struct sock *get_sock_loop_next(unsigned short hnum,
  */
 
 struct sock *get_sock(struct proto *prot, unsigned short num,
-                               unsigned long raddr,
-                               unsigned short rnum, unsigned long laddr,
-                               unsigned long paddr, unsigned short pnum)
+                     unsigned long raddr, unsigned short rnum, 
+                     unsigned long laddr, unsigned long paddr,
+                     unsigned short pnum)
 {
        struct sock *s = 0;
        struct sock *result = NULL;
@@ -1570,7 +1533,7 @@ struct sock *get_sock_mcast(struct sock *sk,
 
 #endif
 
-static struct proto_ops inet_proto_ops = {
+struct proto_ops inet_proto_ops = {
        AF_INET,
 
        inet_create,
@@ -1673,16 +1636,20 @@ void inet_proto_init(struct net_proto *pro)
         
        for(i = 0; i < SOCK_ARRAY_SIZE; i++) 
        {
-               tcp_prot.sock_array[i] = NULL;
-               udp_prot.sock_array[i] = NULL;
-               raw_prot.sock_array[i] = NULL;
+               tcp_sock_array[i] = NULL;
+               udp_sock_array[i] = NULL;
+               raw_sock_array[i] = NULL;
        }
+
        tcp_prot.inuse = 0;
        tcp_prot.highestinuse = 0;
+       tcp_prot.sock_array = tcp_sock_array;
        udp_prot.inuse = 0;
        udp_prot.highestinuse = 0;
+       udp_prot.sock_array = udp_sock_array;
        raw_prot.inuse = 0;
        raw_prot.highestinuse = 0;
+       raw_prot.sock_array = raw_sock_array;
 
        printk("IP Protocols: ");
        for(p = inet_protocol_base; p != NULL;) 
@@ -1693,14 +1660,17 @@ void inet_proto_init(struct net_proto *pro)
                p = tmp;
        }
 
+
        /*
         *      Set the ARP module up
         */
        arp_init();
+
        /*
         *      Set the IP module up
         */
        ip_init();
+
        /*
         *      Set the ICMP layer up
         */
index e2b46fb927975117dc8c5858ccd7ddce1f361db2..374ef8d111373c793b177ca779cb41684e49e613 100644 (file)
@@ -639,7 +639,8 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, struct devi
        int hash;
        struct inet_protocol *ipprot;
        unsigned char *dp;      
-       
+       __u32 info = 0;
+
        iph = (struct iphdr *) (icmph + 1);
        
        dp= ((unsigned char *)iph)+(iph->ihl<<2);
@@ -723,11 +724,7 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, struct devi
                                         */
                                                new_mtu = 68;
                                }
-                               /*
-                                * Ugly trick to pass MTU to protocol layer.
-                                * Really we should add argument "info" to error handler.
-                                */
-                               iph->id = htons(new_mtu);
+                               info = new_mtu;
                                break;
                        }
 #endif
@@ -777,7 +774,7 @@ static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, struct devi
 
                if (iph->protocol == ipprot->protocol && ipprot->err_handler) 
                {
-                       ipprot->err_handler(icmph->type, icmph->code, dp,
+                       ipprot->err_handler(icmph->type, icmph->code, dp, info,
                                            iph->daddr, iph->saddr, ipprot);
                }
 
index e33a0dd91ec4840132c4e1e967a564cbd761315e..67e10979f80ae7bcdce0400c3686c6b27ae26f7c 100644 (file)
@@ -566,7 +566,8 @@ int ip_fw_masquerade(struct sk_buff **skb_ptr, struct device *dev)
                else timeout = ip_masq_expire->tcp_timeout;
 
                skb->csum = csum_partial((void *)(th + 1), size - sizeof(*th), 0);
-               tcp_send_check(th,iph->saddr,iph->daddr,size,skb);
+               tcp_v4_check(th, size, iph->saddr, iph->daddr,
+                            skb->csum);
        }
         ip_masq_set_expire(ms, timeout);
        ip_send_check(iph);
@@ -896,10 +897,14 @@ int ip_fw_demasquerade(struct sk_buff **skb_p, struct device *dev)
                        struct tcphdr *th;
                         skb->csum = csum_partial((void *)(((struct tcphdr *)portptr) + 1),
                                                  len - sizeof(struct tcphdr), 0);
-                        tcp_send_check((struct tcphdr *)portptr,iph->saddr,iph->daddr,len,skb);
+                       th = (struct tcphdr *) portptr;
+                       th->check = 0;
 
+                        tcp_v4_check(th, len, iph->saddr, iph->daddr,
+                                    skb->csum);
+                       
                        /* Check if TCP FIN or RST */
-                       th = (struct tcphdr *)portptr;
+                       
                        if (th->fin)
                        {
                                ms->flags |= IP_MASQ_F_SAW_FIN_IN;
index 31e0d2ea9ceaf7ce7b7a5ec802d41515ae3ff9a9..77880db58c599ad3f57c1a8b02e759b55e60c8d2 100644 (file)
@@ -321,42 +321,6 @@ void ip_send_check(struct iphdr *iph)
 }
 
 
-/*
- *     If a sender wishes the packet to remain unfreed
- *     we add it to his send queue. This arguably belongs
- *     in the TCP level since nobody else uses it. BUT
- *     remember IPng might change all the rules.
- */
-static inline void add_to_send_queue(struct sock * sk, struct sk_buff * skb)
-{
-       unsigned long flags;
-
-       /* The socket now has more outstanding blocks */
-       sk->packets_out++;
-
-       /* Protect the list for a moment */
-       save_flags(flags);
-       cli();
-
-       if (skb->link3 != NULL)
-       {
-               NETDEBUG(printk("ip.c: link3 != NULL\n"));
-               skb->link3 = NULL;
-       }
-       if (sk->send_head == NULL)
-       {
-               sk->send_tail = skb;
-               sk->send_head = skb;
-               sk->send_next = skb;
-       }
-       else
-       {
-               sk->send_tail->link3 = skb;
-               sk->send_tail = skb;
-       }
-       restore_flags(flags);
-}
-
 
 /*
  * Queues a packet to be sent, and starts the transmitter
@@ -397,13 +361,11 @@ void ip_queue_xmit(struct sock *sk, struct device *dev,
 
        switch (free) {
                /* No reassigning numbers to fragments... */
-               default:
+               case 2:
                        free = 1;
                        break;
-               case 0:
-                       add_to_send_queue(sk, skb);
-                       /* fall through */
-               case 1:
+               default:
+                       free = 1;
                        iph->id = htons(ip_id_count++);
        }
 
@@ -732,7 +694,7 @@ int ip_build_xmit(struct sock *sk,
         
                maxfraglen = ((dev->mtu-20) & ~7) + fragheaderlen;
         }
-       
+
        /*
         *      Start at the end of the frame by handling the remainder.
         */
index ab17ffa8fb7518120ef20b9437a8537fa774cf6c..6db07766d585bbda4ee90ee427ede3e86a1ee5b5 100644 (file)
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
-/*
- *     We really ought to have a single public _inline_ min function!
- */
-
-static unsigned long min(unsigned long a, unsigned long b)
-{
-       if (a < b) 
-               return(a);
-       return(b);
-}
-
 
 /*
  *     This should be the easiest of all, all we do is copy it into a buffer. 
@@ -480,23 +469,22 @@ int packet_recvmsg(struct sock *sk, struct msghdr *msg, int len,
 struct proto packet_prot = 
 {
        packet_close,
-       ip_build_header,        /* Not actually used */
        NULL,
+       NULL,                   /* accept */
        NULL,
-       ip_queue_xmit,          /* These two are not actually used */
        NULL,
        NULL,
-       NULL,
-       NULL, 
        datagram_select,
        NULL,                   /* No ioctl */
        packet_init,
        NULL,
+       NULL,
        NULL,                   /* No set/get socket options */
        NULL,
        packet_sendmsg,         /* Sendmsg */
        packet_recvmsg,         /* Recvmsg */
        packet_bind,            /* Bind */
+       NULL,                   /* Backlog_rcv */
        128,
        0,
        "PACKET",
index 5b9fc3c7e6e24729c9cfcf99e25d0462f9ab7d8d..4494270cceedaa980a1840fcf529c0fe83393ac4 100644 (file)
@@ -59,6 +59,7 @@ get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t of
 {
        struct sock **s_array;
        struct sock *sp;
+       struct tcp_opt *tp;
        int i;
        int timer_active;
        int timer_active1;
@@ -87,6 +88,7 @@ get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t of
        {
                cli();
                sp = s_array[i];
+
                while(sp != NULL) 
                {
                        pos += 128;
@@ -95,6 +97,9 @@ get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t of
                                sp = sp->next;
                                continue;
                        }
+
+                       tp = &(sp->tp_pinfo.af_tcp);
+
                        dest  = sp->daddr;
                        src   = sp->saddr;
                        destp = sp->dummy_th.dest;
@@ -122,13 +127,14 @@ get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t of
                        sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
                                " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
                                i, src, srcp, dest, destp, sp->state, 
-                               format==0?sp->write_seq-sp->rcv_ack_seq:sp->wmem_alloc, 
-                               format==0?sp->acked_seq-sp->copied_seq:sp->rmem_alloc,
+                               format==0?sp->write_seq-tp->snd_una:sp->wmem_alloc, 
+                               format==0?tp->rcv_nxt-sp->copied_seq:sp->rmem_alloc,
                                timer_active, timer_expires-jiffies, (unsigned) sp->retransmits,
                                (sp->socket&&SOCK_INODE(sp->socket))?SOCK_INODE(sp->socket)->i_uid:0,
                                timer_active?sp->timeout:0,
                                sp->socket && SOCK_INODE(sp->socket) ?
                                SOCK_INODE(sp->socket)->i_ino : 0);
+
                        if (timer_active1) add_timer(&sp->retransmit_timer);
                        if (timer_active2) add_timer(&sp->timer);
                        len += sprintf(buffer+len, "%-127s\n", tmpbuf);
index 2773fa326f4a15a1d3d60855e7dd81a2e22314ae..bb9ff5fbb592ca2d327fad6cac1db99eb6097e01 100644 (file)
@@ -66,8 +66,8 @@ static struct inet_protocol ipip_protocol =
 
 static struct inet_protocol tcp_protocol = 
 {
-       tcp_rcv,                /* TCP handler          */
-       tcp_err,                /* TCP error control    */  
+       tcp_v4_rcv,             /* TCP handler          */
+       tcp_v4_err,             /* TCP error control    */  
 #if defined(CONFIG_NET_IPIP) && defined(CONFIG_IP_FORWARD)
        &ipip_protocol,
 #else  
index 036e283523f3ae3a01eca44209711e1ea356c859..4f1ac37cea6827f2c8e33b16239a2b97eef9e40e 100644 (file)
 struct sock *mroute_socket=NULL;
 #endif
 
-static inline unsigned long min(unsigned long a, unsigned long b)
-{
-       if (a < b) 
-               return(a);
-       return(b);
-}
-
 
 /*
  *     Raw_err does not currently get called by the icmp module - FIXME:
@@ -109,7 +102,7 @@ void raw_err (int type, int code, unsigned char *header, __u32 daddr,
        return;
 }
 
-static inline void raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
+static inline int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
 {
        /* Charge it to the socket. */
        
@@ -118,22 +111,10 @@ static inline void raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
                ip_statistics.IpInDiscards++;
                skb->sk=NULL;
                kfree_skb(skb, FREE_READ);
-               return;
+               return 0;
        }
 
        ip_statistics.IpInDelivers++;
-}
-
-/*
- * This is the prot->rcv() function. It's called when we have
- * backlogged packets from core/sock.c if we couldn't receive it
- * when the packet arrived.
- */
-static int raw_rcv_redo(struct sk_buff *skb, struct device *dev, struct options *opt,
-       __u32 daddr, unsigned short len,
-       __u32 saddr, int redo, struct inet_protocol * protocol)
-{
-       raw_rcv_skb(skb->sk, skb);
        return 0;
 }
 
@@ -376,14 +357,11 @@ int raw_recvmsg(struct sock *sk, struct msghdr *msg, int len,
 
 struct proto raw_prot = {
        raw_close,
-       ip_build_header,
        udp_connect,
        NULL,
-       ip_queue_xmit,
        NULL,
        NULL,
        NULL,
-       raw_rcv_redo,
        datagram_select,
 #ifdef CONFIG_IP_MROUTE        
        ipmr_ioctl,
@@ -392,14 +370,16 @@ struct proto raw_prot = {
 #endif         
        raw_init,
        NULL,
+       NULL,
        ip_setsockopt,
        ip_getsockopt,
        raw_sendmsg,
        raw_recvmsg,
        NULL,           /* No special bind */
+       raw_rcv_skb,
        128,
        0,
        "RAW",
        0, 0,
-       {NULL,}
+       NULL
 };
index 21d7cd3be8524748ee03b3321381ac5fa65bec40..c2d8a06aff0951ed11865251240a88b9eb95c450 100644 (file)
@@ -7,6 +7,20 @@
 
 #include <linux/mm.h>
 #include <linux/sysctl.h>
+#include <net/tcp.h>
+
+/*
+ *     TCP configuration parameters
+ */
+
+#define TCP_PMTU_DISC  0x00000001      /* perform PMTU discovery         */
+#define TCP_CONG_AVOID 0x00000002      /* congestion avoidance algorithm */
+#define TCP_DELAY_ACKS 0x00000003      /* delayed ack stategy            */
+
+#if 0
+static int boolean_min = 0;
+static int boolean_max = 1;
+#endif
 
 /* From arp.c */
 extern int sysctl_arp_res_time;
@@ -17,6 +31,8 @@ extern int sysctl_arp_check_interval;
 extern int sysctl_arp_confirm_interval;
 extern int sysctl_arp_confirm_timeout;
 
+extern int sysctl_tcp_vegas_cong_avoidance;
+
 ctl_table ipv4_table[] = {
         {NET_IPV4_ARP_RES_TIME, "arp_res_time",
          &sysctl_arp_res_time, sizeof(int), 0644, NULL, &proc_dointvec},
@@ -34,5 +50,15 @@ ctl_table ipv4_table[] = {
         {NET_IPV4_ARP_CONFIRM_TIMEOUT, "arp_confirm_timeout",
          &sysctl_arp_confirm_timeout, sizeof(int), 0644, NULL,
          &proc_dointvec},
+#if 0
+       {TCP_PMTU_DISC, "tcp_pmtu_discovery",
+       &ipv4_pmtu_discovery, sizeof(int), 644, 
+       NULL, &proc_dointvec, &sysctl_intvec_minmax, 
+       &boolean_min, &boolean_max},
+#endif
+
+       {NET_IPV4_TCP_VEGAS_CONG_AVOID, "tcp_vegas_cong_avoid",
+        &sysctl_tcp_vegas_cong_avoidance, sizeof(int), 0644,
+        NULL, &proc_dointvec },
        {0}
 };
index 0a8af9c887c7de9d77b4e71abdfbc64d3a702b80..e24b4a9826a2a825d9fba18ede326db74b2fdd04 100644 (file)
  *                                     ack if stat is TCP_CLOSED.
  *             Alan Cox        :       Look up device on a retransmit - routes may
  *                                     change. Doesn't yet cope with MSS shrink right
- *                                     but it's a start!
+ *                                     but its a start!
  *             Marc Tamsky     :       Closing in closing fixes.
  *             Mike Shaver     :       RFC1122 verifications.
  *             Alan Cox        :       rcv_saddr errors.
  *                                     against machines running Solaris,
  *                                     and seems to result in general
  *                                     improvement.
- *             Eric Schenk     :       Changed receiver side silly window
- *                                     avoidance algorithm to BSD style
- *                                     algorithm. This doubles throughput
- *                                     against machines running Solaris,
- *                                     and seems to result in general
- *                                     improvement.
  *     Stefan Magdalinski      :       adjusted tcp_readable() to fix FIONREAD
  *     Willy Konynenberg       :       Transparent proxying support.
- *             Theodore Ts'o   :       Do secure TCP sequence numbers.
  *                                     
  * To Fix:
  *             Fast path the code. Two things here - fix the window calculation
  * (Whew. -- MS 950903)
  **/
 
-#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
-#include <linux/random.h>
 
 #include <net/icmp.h>
 #include <net/tcp.h>
 unsigned long seq_offset;
 struct tcp_mib tcp_statistics;
 
-static void tcp_close(struct sock *sk, unsigned long timeout);
+
 
 /*
  *     Find someone to 'accept'. Must be called with
  *     the socket locked or with interrupts disabled
  */
 
-static struct sk_buff *tcp_find_established(struct sock *s)
+static struct open_request *tcp_find_established(struct tcp_opt *tp)
 {
-       struct sk_buff *p=skb_peek(&s->receive_queue);
-       if(p==NULL)
+       struct open_request *req;
+
+       req = tp->syn_wait_queue;
+
+       if (!req)
                return NULL;
-       do
-       {
-               if(p->sk->state == TCP_ESTABLISHED || p->sk->state >= TCP_FIN_WAIT1)
-                       return p;
-               p=p->next;
-       }
-       while(p!=(struct sk_buff *)&s->receive_queue);
+       
+       do {
+               if (req->sk && 
+                   (req->sk->state == TCP_ESTABLISHED ||
+                    req->sk->state >= TCP_FIN_WAIT1))
+               {
+                       return req;
+               }
+
+               req = req->dl_next;
+
+       } while (req != tp->syn_wait_queue);
+       
        return NULL;
 }
 
@@ -493,108 +493,9 @@ void tcp_time_wait(struct sock *sk)
 
 
 /*
- * This routine is called by the ICMP module when it gets some
- * sort of error condition.  If err < 0 then the socket should
- * be closed and the error returned to the user.  If err > 0
- * it's just the icmp type << 8 | icmp code.  After adjustment
- * header points to the first 8 bytes of the tcp header.  We need
- * to find the appropriate port.
- */
-
-void tcp_err(int type, int code, unsigned char *header, __u32 daddr,
-       __u32 saddr, struct inet_protocol *protocol)
-{
-       struct tcphdr *th = (struct tcphdr *)header;
-       struct sock *sk;
-
-       /*
-        *      This one is _WRONG_. FIXME urgently.
-        */
-#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
-       struct iphdr *iph=(struct iphdr *)(header-sizeof(struct iphdr));
-#endif
-       th =(struct tcphdr *)header;
-       sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr, 0, 0);
-
-       if (sk == NULL)
-               return;
-
-       if (type == ICMP_SOURCE_QUENCH)
-       {
-               /*
-                * FIXME:
-                * Follow BSD for now and just reduce cong_window to 1 again.
-                * It is possible that we just want to reduce the
-                * window by 1/2, or that we want to reduce ssthresh by 1/2
-                * here as well.
-                */
-               sk->cong_window = 1;
-               sk->high_seq = sk->sent_seq;
-               return;
-       }
-
-       if (type == ICMP_PARAMETERPROB)
-       {
-               sk->err=EPROTO;
-               sk->error_report(sk);
-       }
-
-#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
-       if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
-       {
-               struct rtable * rt;
-               /*
-                * Ugly trick to pass MTU to protocol layer.
-                * Really we should add argument "info" to error handler.
-                */
-               unsigned short new_mtu = ntohs(iph->id);
-
-               if ((rt = sk->ip_route_cache) != NULL)
-                       if (rt->rt_mtu > new_mtu)
-                               rt->rt_mtu = new_mtu;
-
-               /*
-                *      FIXME::
-                *      Not the nicest of fixes: Lose a MTU update if the socket is
-                *      locked this instant. Not the right answer but will be best
-                *      for the production fix. Make 2.1 work right!
-                */
-                
-               if (sk->mtu > new_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr)
-                       && new_mtu > sizeof(struct iphdr)+sizeof(struct tcphdr) && !sk->users)
-                       sk->mtu = new_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
-
-               return;
-       }
-#endif
-
-       /*
-        * If we've already connected we will keep trying
-        * until we time out, or the user gives up.
-        */
-
-       if(code<=NR_ICMP_UNREACH)
-       {
-               if(icmp_err_convert[code].fatal || sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV)
-               {
-                       sk->err = icmp_err_convert[code].errno;
-                       if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV)
-                       {
-                               tcp_statistics.TcpAttemptFails++;
-                               tcp_set_state(sk,TCP_CLOSE);
-                               sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
-                       }
-               }
-               else    /* Only an error on timeout */
-                       sk->err_soft = icmp_err_convert[code].errno;
-       }
-}
-
-
-/*
- *     Walk down the receive queue counting readable data until we hit the end or we find a gap
- *     in the received data queue (ie a frame missing that needs sending to us). Not
- *     sorting using two queues as data arrives makes life so much harder.
+ *     Walk down the receive queue counting readable data until we hit the 
+ *     end or we find a gap in the received data queue (ie a frame missing 
+ *     that needs sending to us). 
  */
 
 static int tcp_readable(struct sock *sk)
@@ -627,13 +528,19 @@ static int tcp_readable(struct sock *sk)
 
        do
        {
-               if (before(counted, skb->seq))          /* Found a hole so stops here */
+               /* Found a hole so stops here */
+               if (before(counted, skb->seq))          
                        break;
-               sum = skb->len - (counted - skb->seq);  /* Length - header but start from where we are up to (avoid overlaps) */
+               /* 
+                * Length - header but start from where we are up to 
+                * avoid overlaps 
+                */
+               sum = skb->len - (counted - skb->seq);  
                if (skb->h.th->syn)
                        sum++;
                if (sum > 0)
-               {                                       /* Add it up, move on */
+               {       
+                       /* Add it up, move on */
                        amount += sum;
                        if (skb->h.th->syn)
                                amount--;
@@ -655,9 +562,13 @@ static int tcp_readable(struct sock *sk)
                 * and a blocking read().  And the queue scan in tcp_read()
                 * was correct.  Mike <pall@rz.uni-karlsruhe.de>
                 */
+
+               /* don't count urg data */
                if (skb->h.th->urg)
-                       amount--;       /* don't count urg data */
-/*             if (amount && skb->h.th->psh) break;*/
+                       amount--;
+#if 0
+               if (amount && skb->h.th->psh) break;
+#endif
                skb = skb->next;
        }
        while(skb != (struct sk_buff *)&sk->receive_queue);
@@ -674,12 +585,12 @@ static int tcp_readable(struct sock *sk)
 static int tcp_listen_select(struct sock *sk, int sel_type, select_table *wait)
 {
        if (sel_type == SEL_IN) {
-               struct sk_buff * skb;
+               struct open_request *req;
 
                lock_sock(sk);
-               skb = tcp_find_established(sk);
+               req = tcp_find_established(&sk->tp_pinfo.af_tcp);
                release_sock(sk);
-               if (skb)
+               if (req)
                        return 1;
                select_wait(sk->sleep,wait);
                return 0;
@@ -687,7 +598,6 @@ static int tcp_listen_select(struct sock *sk, int sel_type, select_table *wait)
        return 0;
 }
 
-
 /*
  *     Wait for a TCP event.
  *
@@ -695,8 +605,10 @@ static int tcp_listen_select(struct sock *sk, int sel_type, select_table *wait)
  *     take care of normal races (between the test and the event) and we don't
  *     go look at any of the socket buffers directly.
  */
-static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
+int tcp_select(struct sock *sk, int sel_type, select_table *wait)
 {
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
        if (sk->state == TCP_LISTEN)
                return tcp_listen_select(sk, sel_type, wait);
 
@@ -709,12 +621,12 @@ static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
 
                if (sk->shutdown & RCV_SHUTDOWN)
                        return 1;
-
-               if (sk->acked_seq == sk->copied_seq)
+                       
+               if (tp->rcv_nxt == sk->copied_seq)
                        break;
 
                if (sk->urg_seq != sk->copied_seq ||
-                   sk->acked_seq != sk->copied_seq+1 ||
+                   tp->rcv_nxt != sk->copied_seq+1 ||
                    sk->urginline || !sk->urg_data)
                        return 1;
                break;
@@ -726,7 +638,12 @@ static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
                        return 0;
                if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV)
                        break;
-               if (sk->wmem_alloc*2 > sk->sndbuf)
+               /*
+                * This is now right thanks to a small fix
+                * by Matt Dillon.
+                */
+
+               if (sock_wspace(sk) < sk->mtu+128+sk->prot->max_header)
                        break;
                return 1;
 
@@ -792,50 +709,21 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 }
 
 
-/*
- *     This routine computes a TCP checksum.
- *
- *     Modified January 1995 from a go-faster DOS routine by
- *     Jorge Cwik <jorge@laser.satlink.net>
+/* 
+ *     This routine builds a generic TCP header. 
  */
-#undef DEBUG_TCP_CHECK
-void tcp_send_check(struct tcphdr *th, unsigned long saddr,
-               unsigned long daddr, int len, struct sk_buff *skb)
-{
-#ifdef DEBUG_TCP_CHECK
-       u16 check;
-#endif
-       th->check = 0;
-       th->check = tcp_check(th, len, saddr, daddr,
-               csum_partial((char *)th,sizeof(*th),skb->csum));
-
-#ifdef DEBUG_TCP_CHECK
-       check = th->check;
-       th->check = 0;
-       th->check = tcp_check(th, len, saddr, daddr,
-               csum_partial((char *)th,len,0));
-       if (check != th->check) {
-               static int count = 0;
-               if (++count < 10) {
-                       printk("Checksum %x (%x) from %p\n", th->check, check,
-                               (&th)[-1]);
-                       printk("TCP=<off:%d a:%d s:%d f:%d>\n", th->doff*4, th->ack, th->syn, th->fin);
-               }
-       }
-#endif
-}
-
-
-/*
- *     This routine builds a generic TCP header.
- */
-
-static inline int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
+extern __inline int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
 {
+       struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
        memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
-       th->psh = (push == 0) ? 1 : 0;
        th->seq = htonl(sk->write_seq);
-       th->ack_seq = htonl(sk->acked_seq);
+#if 0
+       th->psh =(push == 0) ? 1 : 0;
+#endif
+       sk->bytes_rcv = 0;
+       sk->ack_timed = 0;
+       th->ack_seq = htonl(tp->rcv_nxt);
        th->window = htons(tcp_select_window(sk));
 
        return(sizeof(*th));
@@ -890,187 +778,173 @@ static void wait_for_tcp_memory(struct sock * sk)
        lock_sock(sk);
 }
 
-/*
- * Add more stuff to the end of skb->len
- */
-static int fill_in_partial_skb(struct sock *sk, struct sk_buff *skb,
-       unsigned char * from, int seglen)
-{
-       void (*send)(struct sock *sk, struct sk_buff *skb);
-       int copy, tcp_size;
 
-       tcp_size = skb->tail - (unsigned char *)(skb->h.th + 1);
+static int tcp_append_tail(struct sock *sk, struct sk_buff *skb, u8 *from,
+                          int tcp_size, int seglen)
+{
+       int fault;
+       int copy;
 
-       /*
-        *      Now we may find the frame is as big, or too
-        *      big for our MSS. Thats all fine. It means the
-        *      MSS shrank (from an ICMP) after we allocated 
-        *      this frame.
+       /* 
+        * Add more stuff to the end 
+        * of the skb
         */
 
-       copy = sk->mss - tcp_size;
-       if (copy <= 0) {
-               tcp_send_skb(sk, skb);
-               return 0;
+       copy = min(sk->mss - tcp_size, skb->end - skb->tail);
+       copy = min(copy, seglen);
+       
+       tcp_size += copy;
+       
+       fault = copy_from_user(skb->tail, from, copy);
+       
+       if (fault)
+       {
+               return -1;
        }
 
-       /*
-        *      Otherwise continue to fill the buffer.
-        */
-       send = tcp_send_skb;
-       if (copy > seglen) {
-               send = tcp_enqueue_partial;
-               copy = seglen;
-       }
-       copy_from_user(skb->tail, from, copy);
-       tcp_size += copy;
-       skb->tail += copy;
-       skb->len += copy;
+       skb_put(skb, copy);
        skb->csum = csum_partial(skb->tail - tcp_size, tcp_size, 0);
+
        sk->write_seq += copy;
-       if (!sk->packets_out)
-               send = tcp_send_skb;
-       send(sk, skb);
+       skb->end_seq += copy;
+
        return copy;
 }
 
-
 /*
  *     This routine copies from a user buffer into a socket,
  *     and starts the transmit system.
  */
 
-static int do_tcp_sendmsg(struct sock *sk,
-       int iovlen, struct iovec *iov,
-       int len, int nonblock, int flags)
+int tcp_do_sendmsg(struct sock *sk, int iovlen, struct iovec *iov,
+                  int len, int nonblock, int flags)
 {
-       int copied = 0;
-       struct device *dev = NULL;
+       int copied  = 0;
+       struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
 
-       /*
+       /* 
         *      Wait for a connection to finish.
         */
        while (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)
        {
-               if (sk->err)
+               
+               if (copied)
+                       return copied;
+               
+               if (sk->err) 
                        return sock_error(sk);
-
+               
                if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV)
                {
                        if (sk->keepopen)
                                send_sig(SIGPIPE, current, 0);
                        return -EPIPE;
                }
-
+               
                if (nonblock)
                        return -EAGAIN;
-
+               
                if (current->signal & ~current->blocked)
                        return -ERESTARTSYS;
-
+               
                wait_for_tcp_connect(sk);
        }
-
+       
+       
        /*
         *      Ok commence sending
         */
-
-       while (--iovlen >= 0)
+       
+       while(--iovlen >= 0)
        {
                int seglen=iov->iov_len;
                unsigned char * from=iov->iov_base;
+               u32 actual_win;
+
                iov++;
 
-               while(seglen > 0)
+               while(seglen > 0) 
                {
                        int copy;
                        int tmp;
                        struct sk_buff *skb;
-                       void (*send)(struct sock *, struct sk_buff *);
 
                        /*
                         * Stop on errors
                         */
-                       if (sk->err)
+                       if (sk->err) 
                        {
-                               if (copied)
+                               if (copied) 
                                        return copied;
                                return sock_error(sk);
                        }
 
                        /*
-                        *      Make sure that we are established.
+                        *      Make sure that we are established. 
                         */
-                       if (sk->shutdown & SEND_SHUTDOWN)
+                       if (sk->shutdown & SEND_SHUTDOWN) 
                        {
                                if (copied)
                                        return copied;
                                send_sig(SIGPIPE,current,0);
                                return -EPIPE;
                        }
-
-                       /*
-                        * The following code can result in copy <= if sk->mss is ever
-                        * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
-                        * sk->mtu is constant once SYN processing is finished.  I.e. we
-                        * had better not get here until we've seen his SYN and at least one
-                        * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
-                        * But ESTABLISHED should guarantee that.  sk->max_window is by definition
-                        * non-decreasing.  Note that any ioctl to set user_mss must be done
-                        * before the exchange of SYN's.  If the initial ack from the other
-                        * end has a window of 0, max_window and thus mss will both be 0.
+       
+                       /* 
+                        *Now we need to check if we have a half built packet. 
                         */
 
-                       /*
-                        *      Now we need to check if we have a half built packet.
-                        */
-#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
-                       /*
-                        *      Really, we should rebuild all the queues...
-                        *      It's difficult. Temporary hack is to send all
-                        *      queued segments with allowed fragmentation.
-                        */
+                       /* if we have queued packets */
+                       if (tp->send_head && !(flags & MSG_OOB) ) 
                        {
+                               int tcp_size;
+
+                               /* Tail */
+                               
+                               skb = sk->write_queue.prev;
+                               tcp_size = skb->tail - 
+                                       (unsigned char *)(skb->h.th + 1);
+                                       
                                /*
-                                *      new_mss may be zero. That indicates
-                                *      we don't have a window estimate for
-                                *      the remote box yet. 
-                                *              -- AC
+                                * This window_seq test is somewhat dangerous
+                                * If the remote does SWS avoidance we should
+                                * queue the best we can
+                                * if not we should in fact send multiple
+                                * packets...
+                                * a method for detecting this would be most
+                                * welcome
                                 */
-                               
-                               int new_mss = min(sk->mtu, sk->max_window);
-                               if (new_mss && new_mss < sk->mss)
-                               {
-                                       tcp_send_partial(sk);
-                                       sk->mss = new_mss;
-                               }
-                       }
-#endif
 
-                       /*
-                        *      If there is a partly filled frame we can fill
-                        *      out.
-                        */
-                       skb = tcp_dequeue_partial(sk);
-                       if (skb) {
-                               if (!(flags & MSG_OOB)) {
-                                       int retval;
-                                       retval = fill_in_partial_skb(sk, skb, from, seglen);
-                                       if (retval < 0)
-                                               return retval;
-                                       seglen -= retval;
-                                       from += retval;
-                                       copied += retval;
-                                       len -= retval;
+                               if (skb->end > skb->tail &&
+                                   sk->mss - tcp_size > 0 &&
+                                   skb->end_seq < tp->snd_una + tp->snd_wnd) 
+                               {
+                                       
+                                       copy = tcp_append_tail(sk, skb, from,
+                                                              tcp_size,
+                                                              seglen);
+                                       if (copy == -1)
+                                       {
+                                               return -EFAULT;
+                                       }
+                                       
+                                       from += copy;
+                                       copied += copy;
+                                       len -= copy;
+                                       seglen -= copy;
+                                       
+                                       /*
+                                        *      FIXME: if we're nagling we
+                                        *      should send here.
+                                        */
                                        continue;
                                }
-                               tcp_send_skb(sk, skb);
-                               continue;
                        }
 
+
                /*
-                * We also need to worry about the window.
-                * If window < 1/2 the maximum window we've seen from this
+                *   We also need to worry about the window.
+                *   If window < 1/2 the maximum window we've seen from this
                 *   host, don't use it.  This is sender side
                 *   silly window prevention, as specified in RFC1122.
                 *   (Note that this is different than earlier versions of
@@ -1080,40 +954,51 @@ static int do_tcp_sendmsg(struct sock *sk,
                 *   be queued for later rather than sent.
                 */
 
-                       copy = sk->window_seq - sk->write_seq;
-                       if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
-                               copy = sk->mss;
-                       if (copy > seglen)
-                               copy = seglen;
+                       copy = min(seglen, sk->mss);
+
+                       actual_win = tp->snd_wnd - (tp->snd_nxt - tp->snd_una);
+
+                       if (copy > actual_win && 
+                           actual_win >= (sk->max_window >> 1))
+                       {
+                               copy = actual_win;
+                       }
+
                        if (copy <= 0)
                        {
-                               printk(KERN_CRIT "TCP: **bug**: copy=%d, sk->mss=%d\n", copy, sk->mss);
-                               return -EFAULT;
+                               printk(KERN_DEBUG "sendmsg: copy < 0\n");
+                               return -EIO;
                        }
 
                        /*
-                        *      We should really check the window here also.
+                        *  If sk->packets_out > 0 segment will be nagled
+                        *  else we kick it right away
                         */
 
-                       send = tcp_send_skb;
-                       tmp = copy + sk->prot->max_header + 15;
-                       if (copy < sk->mss && !(flags & MSG_OOB) && sk->packets_out)
+                       tmp = MAX_HEADER + sk->prot->max_header + 
+                               sizeof(struct sk_buff) + 15;
+                       if (copy < min(sk->mss, sk->max_window >> 1) && 
+                           !(flags & MSG_OOB) && sk->packets_out)
+                       {
+                               tmp += min(sk->mss, sk->max_window);
+                       }
+                       else
                        {
-                               tmp = tmp - copy + sk->mtu + 128;
-                               send = tcp_enqueue_partial;
+                               tmp += copy;
                        }
+                       
                        skb = sock_wmalloc(sk, tmp, 0, GFP_KERNEL);
-
+       
                        /*
-                        *      If we didn't get any memory, we need to sleep.
+                        *      If we didn't get any memory, we need to sleep. 
                         */
-
-                       if (skb == NULL)
+       
+                       if (skb == NULL) 
                        {
                                sk->socket->flags |= SO_NOSPACE;
-                               if (nonblock)
+                               if (nonblock) 
                                {
-                                       if (copied)
+                                       if (copied) 
                                                return copied;
                                        return -EAGAIN;
                                }
@@ -1132,27 +1017,14 @@ static int do_tcp_sendmsg(struct sock *sk,
                        skb->sk = sk;
                        skb->free = 0;
                        skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
-
+       
                        /*
                         * FIXME: we need to optimize this.
                         * Perhaps some hints here would be good.
                         */
 
-                       tmp = sk->prot->build_header(skb, sk->saddr, sk->daddr, &dev,
-                                IPPROTO_TCP, sk->opt, skb->truesize,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
-                       if (tmp < 0 )
-                       {
-                               sock_wfree(sk, skb);
-                               if (copied)
-                                       return(copied);
-                               return(tmp);
-                       }
-#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
-                       skb->ip_hdr->frag_off |= htons(IP_DF);
-#endif
-                       skb->dev = dev;
-                       skb->h.th =(struct tcphdr *)skb_put(skb,sizeof(struct tcphdr));
-                       tmp = tcp_build_header(skb->h.th, sk, seglen-copy);
+                       tmp = tp->af_specific->build_net_header(sk, skb);
+
                        if (tmp < 0)
                        {
                                sock_wfree(sk, skb);
@@ -1161,6 +1033,20 @@ static int do_tcp_sendmsg(struct sock *sk,
                                return(tmp);
                        }
 
+                       skb->h.th =(struct tcphdr *) 
+                         skb_put(skb,sizeof(struct tcphdr));
+
+                       seglen -= copy;
+                       tmp = tcp_build_header(skb->h.th, sk, seglen || iovlen);
+
+                        if (tmp < 0) 
+                        {
+                                sock_wfree(sk, skb);
+                                if (copied) 
+                                        return(copied);
+                                return(tmp);
+                        }
+
                        if (flags & MSG_OOB)
                        {
                                skb->h.th->urg = 1;
@@ -1168,92 +1054,42 @@ static int do_tcp_sendmsg(struct sock *sk,
                        }
 
                        skb->csum = csum_partial_copy_fromuser(from,
-                               skb->tail, copy, 0);
-                       skb->tail += copy;
-                       skb->len += copy;
+                                       skb_put(skb, copy), copy, 0);
+               
                        from += copy;
                        copied += copy;
                        len -= copy;
-                       seglen -= copy;
-                       sk->write_seq += copy;
                        skb->free = 0;
-
-                       send(sk, skb);
+                       sk->write_seq += copy;
+               
+                       tcp_send_skb(sk, skb);
+       
+                       release_sock(sk);
+                       lock_sock(sk);
                }
        }
+
        sk->err = 0;
 
        return copied;
 }
 
 
-static int tcp_sendmsg(struct sock *sk, struct msghdr *msg,
-         int len, int nonblock, int flags)
-{
-       int retval = -EINVAL;
-
-       /*
-        *      Do sanity checking for sendmsg/sendto/send
-        */
-
-       if (flags & ~(MSG_OOB|MSG_DONTROUTE))
-               goto out;
-       if (msg->msg_name) {
-               struct sockaddr_in *addr=(struct sockaddr_in *)msg->msg_name;
-
-               if (msg->msg_namelen < sizeof(*addr))
-                       goto out;
-               if (addr->sin_family && addr->sin_family != AF_INET)
-                       goto out;
-               retval = -ENOTCONN;
-               if(sk->state == TCP_CLOSE)
-                       goto out;
-               retval = -EISCONN;
-               if (addr->sin_port != sk->dummy_th.dest)
-                       goto out;
-               if (addr->sin_addr.s_addr != sk->daddr)
-                       goto out;
-       }
-
-       lock_sock(sk);
-       retval = do_tcp_sendmsg(sk, msg->msg_iovlen, msg->msg_iov, len, nonblock, flags);
+       
 
 /*
- *     Nagle's rule. Turn Nagle off with TCP_NODELAY for highly
- *     interactive fast network servers. It's meant to be on and
- *     it really improves the throughput though not the echo time
- *     on my slow slip link - Alan
- *
- *     If not nagling we can send on the before case too..
+ *     Send an ack if one is backlogged at this point. Ought to merge
+ *     this with tcp_send_ack().
+ *      This is called for delayed acks also.
  */
-
-       if (sk->partial) {
-               if (!sk->packets_out ||
-                   (sk->nonagle && before(sk->write_seq , sk->window_seq))) {
-                       tcp_send_partial(sk);
-               }
-       }
-
-       release_sock(sk);
-
-out:
-       return retval;
-}
-
-
-/*
- *     Send an ack if one is backlogged at this point.
- */
-
 void tcp_read_wakeup(struct sock *sk)
 {
-       if (!sk->ack_backlog)
-               return;
-
        /*
         * If we're closed, don't send an ack, or we'll get a RST
         * from the closed destination.
         */
+
        if ((sk->state == TCP_CLOSE) || (sk->state == TCP_TIME_WAIT))
                return;
 
@@ -1267,8 +1103,11 @@ void tcp_read_wakeup(struct sock *sk)
  */
 
 static int tcp_recv_urg(struct sock * sk, int nonblock,
-            struct msghdr *msg, int len, int flags, int *addr_len)
+                       struct msghdr *msg, int len, int flags, 
+                       int *addr_len)
 {
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
        /*
         *      No URG data to read
         */
@@ -1302,13 +1141,12 @@ static int tcp_recv_urg(struct sock * sk, int nonblock,
                memcpy_toiovec(msg->msg_iov, &c, 1);
                if(msg->msg_name)
                {
-                       struct sockaddr_in *sin=(struct sockaddr_in *)msg->msg_name;
-                       sin->sin_family=AF_INET;
-                       sin->sin_addr.s_addr=sk->daddr;
-                       sin->sin_port=sk->dummy_th.dest;
+                       tp->af_specific->addr2sockaddr(sk, (struct sockaddr *)
+                                                      msg->msg_name);       
                }
                if(addr_len)
-                       *addr_len=sizeof(struct sockaddr_in);
+                       *addr_len= tp->af_specific->sockaddr_len;
+
                release_sock(sk);
                return 1;
        }
@@ -1332,46 +1170,57 @@ static int tcp_recv_urg(struct sock * sk, int nonblock,
 
 static inline void tcp_eat_skb(struct sock *sk, struct sk_buff * skb)
 {
+       sk->ack_backlog++;
+
        skb->sk = sk;
        __skb_unlink(skb, &sk->receive_queue);
        kfree_skb(skb, FREE_READ);
 }
 
-/*
- *     FIXME:
- *     This routine frees used buffers.
- *     It should consider sending an ACK to let the
- *     other end know we now have a bigger window.
- */
 
 static void cleanup_rbuf(struct sock *sk)
 {
+       struct sk_buff *skb;
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
        /*
         * NOTE! The socket must be locked, so that we don't get
         * a messed-up receive queue.
         */
-       while (!skb_queue_empty(&sk->receive_queue)) {
-               struct sk_buff *skb = sk->receive_queue.next;
+
+       while ((skb=skb_peek(&sk->receive_queue)) != NULL) {
                if (!skb->used || skb->users)
                        break;
                tcp_eat_skb(sk, skb);
        }
-
+       
+       if(sk->debug)
+               printk("sk->rspace = %lu\n", sock_rspace(sk));
+       
        /*
-        * Tell the world if we raised the window.
+        *  We send a ACK if the sender is blocked
+        *  else let tcp_data deal with the acking policy.
         */
-       if (tcp_raise_window(sk))
-               tcp_send_ack(sk);
-}
+
+       if (sock_rspace(sk) > tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup) && 
+           (tp->rcv_wnd - (tp->rcv_nxt - tp->rcv_wup) < sk->mss)) 
+       {
+               /* Send an ack right now. */
+               sk->delayed_acks++;
+               tcp_read_wakeup(sk);
+       } 
+       
+} 
 
 
 /*
- *     This routine copies from a sock struct into the user buffer.
+ *     This routine copies from a sock struct into the user buffer. 
  */
-
-static int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
-       int len, int nonblock, int flags, int *addr_len)
+int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
+               int len, int nonblock, int flags, int *addr_len)
 {
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
        struct wait_queue wait = { current, NULL };
        int copied = 0;
        u32 peek_seq;
@@ -1434,11 +1283,20 @@ static int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
 
                current->state = TASK_INTERRUPTIBLE;
 
-               skb = sk->receive_queue.next;
-               while (skb != (struct sk_buff *)&sk->receive_queue)
+               skb = skb_peek(&sk->receive_queue);
+               do
                {
-                       if (before(*seq, skb->seq))
+                       if (!skb)
                                break;
+                       /* 
+                        * now that we have two receive queues this 
+                        * shouldn't happen
+                        */
+                       if (before(*seq, skb->seq)) {
+                               printk("recvmsg bug: copied %X seq %X\n",
+                                      *seq, skb->seq);
+                               break;
+                       }
                        offset = *seq - skb->seq;
                        if (skb->h.th->syn)
                                offset--;
@@ -1450,6 +1308,7 @@ static int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
                                skb->used = 1;
                        skb = skb->next;
                }
+               while (skb != (struct sk_buff *)&sk->receive_queue);
 
                if (copied)
                        break;
@@ -1539,7 +1398,7 @@ static int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
                *seq += used;
 
                /*
-                *      This copy_to_user can sleep. If it sleeps and we
+                *      This memcpy_tofs can sleep. If it sleeps and we
                 *      do a second read it relies on the skb->users to avoid
                 *      a crash when cleanup_rbuf() gets called.
                 */
@@ -1590,15 +1449,13 @@ static int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
 
        }
 
-       if(copied>0 && msg->msg_name)
+       if(copied > 0 && msg->msg_name)
        {
-               struct sockaddr_in *sin=(struct sockaddr_in *)msg->msg_name;
-               sin->sin_family=AF_INET;
-               sin->sin_addr.s_addr=sk->daddr;
-               sin->sin_port=sk->dummy_th.dest;
+               tp->af_specific->addr2sockaddr(sk, (struct sockaddr *)
+                                              msg->msg_name);       
        }
        if(addr_len)
-               *addr_len=sizeof(struct sockaddr_in);
+               *addr_len= tp->af_specific->sockaddr_len;
 
        remove_wait_queue(sk->sleep, &wait);
        current->state = TASK_RUNNING;
@@ -1708,19 +1565,16 @@ void tcp_shutdown(struct sock *sk, int how)
        sk->shutdown |= SEND_SHUTDOWN;
 
        /*
-        *  Clear out any half completed packets.
+        *  Clear out any half completed packets. 
         */
 
-       if (sk->partial)
-               tcp_send_partial(sk);
-
        /*
         *      FIN if needed
         */
-
+        
        if (tcp_close_state(sk,0))
                tcp_send_fin(sk);
-
+               
        release_sock(sk);
 }
 
@@ -1741,7 +1595,7 @@ static inline int closing(struct sock * sk)
 }
 
 
-static void tcp_close(struct sock *sk, unsigned long timeout)
+void tcp_close(struct sock *sk, unsigned long timeout)
 {
        struct sk_buff *skb;
 
@@ -1774,22 +1628,16 @@ static void tcp_close(struct sock *sk, unsigned long timeout)
         *  descriptor close, not protocol-sourced closes, because the
         *  reader process may not have drained the data yet!
         */
-
+                
        while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
                kfree_skb(skb, FREE_READ);
 
-       /*
-        *      Get rid off any half-completed packets.
-        */
-
-       if (sk->partial)
-               tcp_send_partial(sk);
-
+               
        /*
         *      Timeout is not the same thing - however the code likes
         *      to send both the same way (sigh).
         */
-
+        
        if (tcp_close_state(sk,1)==1)
        {
                tcp_send_fin(sk);
@@ -1836,14 +1684,13 @@ static void tcp_close(struct sock *sk, unsigned long timeout)
 
 
 /*
- * Wait for an incoming connection, avoid race
- * conditions. This must be called with the socket
- * locked.
+ *     Wait for an incoming connection, avoid race
+ *     conditions. This must be called with the socket locked.
  */
-static struct sk_buff * wait_for_connect(struct sock * sk)
+static struct open_request * wait_for_connect(struct sock * sk)
 {
        struct wait_queue wait = { current, NULL };
-       struct sk_buff * skb = NULL;
+       struct open_request *req = NULL;
 
        add_wait_queue(sk->sleep, &wait);
        for (;;) {
@@ -1851,27 +1698,29 @@ static struct sk_buff * wait_for_connect(struct sock * sk)
                release_sock(sk);
                schedule();
                lock_sock(sk);
-               skb = tcp_find_established(sk);
-               if (skb)
+               req = tcp_find_established(&(sk->tp_pinfo.af_tcp));
+               if (req)
                        break;
                if (current->signal & ~current->blocked)
                        break;
        }
        remove_wait_queue(sk->sleep, &wait);
-       return skb;
+       return req;
 }
 
+
 /*
  *     This will accept the next outstanding connection.
  *
  *     Be careful about race conditions here - this is subtle.
  */
 
-static struct sock *tcp_accept(struct sock *sk, int flags)
+struct sock *tcp_accept(struct sock *sk, int flags)
 {
-       int error;
-       struct sk_buff *skb;
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+       struct open_request *req;
        struct sock *newsk = NULL;
+       int error;
 
   /*
    * We need to make sure that this socket is listening,
@@ -1884,12 +1733,12 @@ static struct sock *tcp_accept(struct sock *sk, int flags)
 
        lock_sock(sk);
 
-       skb = tcp_find_established(sk);
-       if (skb) {
+       req = tcp_find_established(tp);
+       if (req) {
 got_new_connect:
-               __skb_unlink(skb, &sk->receive_queue);
-               newsk = skb->sk;
-               kfree_skb(skb, FREE_READ);
+               tcp_synq_unlink(tp, req);
+               newsk = req->sk;
+               kfree(req);             
                sk->ack_backlog--;
                error = 0;
 out:
@@ -1902,238 +1751,35 @@ no_listen:
        error = EAGAIN;
        if (flags & O_NONBLOCK)
                goto out;
-       skb = wait_for_connect(sk);
-       if (skb)
+       req = wait_for_connect(sk);
+       if (req)
                goto got_new_connect;
        error = ERESTARTSYS;
        goto out;
 }
 
-/*
- * Check that a TCP address is unique, don't allow multiple
- * connects to/from the same address
- */
-static int tcp_unique_address(u32 saddr, u16 snum, u32 daddr, u16 dnum)
-{
-       int retval = 1;
-       struct sock * sk;
-
-       /* Make sure we are allowed to connect here. */
-       cli();
-       for (sk = tcp_prot.sock_array[snum & (SOCK_ARRAY_SIZE -1)];
-                       sk != NULL; sk = sk->next)
-       {
-               /* hash collision? */
-               if (sk->num != snum)
-                       continue;
-               if (sk->saddr != saddr)
-                       continue;
-               if (sk->daddr != daddr)
-                       continue;
-               if (sk->dummy_th.dest != dnum)
-                       continue;
-               retval = 0;
-               break;
-       }
-       sti();
-       return retval;
-}
-
 
 /*
- *     This will initiate an outgoing connection.
+ *     Socket option code for TCP. 
  */
-
-static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
+  
+int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, 
+                  int optlen)
 {
-       struct sk_buff *buff;
-       struct device *dev=NULL;
-       unsigned char *ptr;
-       int tmp;
-       int atype;
-       struct tcphdr *t1;
-       struct rtable *rt;
-
-       if (sk->state != TCP_CLOSE)
-               return(-EISCONN);
-
-       /*
-        *      Don't allow a double connect.
-        */
-
-       if(sk->daddr)
-               return -EINVAL;
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       int val;
 
-       if (addr_len < 8)
-               return(-EINVAL);
-
-       if (usin->sin_family && usin->sin_family != AF_INET)
-               return(-EAFNOSUPPORT);
-
-       /*
-        *      connect() to INADDR_ANY means loopback (BSD'ism).
-        */
-
-       if (usin->sin_addr.s_addr==INADDR_ANY)
-               usin->sin_addr.s_addr=ip_my_addr();
-
-       /*
-        *      Don't want a TCP connection going to a broadcast address
-        */
-
-       if ((atype=ip_chk_addr(usin->sin_addr.s_addr)) == IS_BROADCAST || atype==IS_MULTICAST)
-               return -ENETUNREACH;
-
-       if (!tcp_unique_address(sk->saddr, sk->num, usin->sin_addr.s_addr, usin->sin_port))
-               return -EADDRNOTAVAIL;
-
-       lock_sock(sk);
-       sk->daddr = usin->sin_addr.s_addr;
-
-       sk->rcv_ack_cnt = 1;
-       sk->err = 0;
-       sk->dummy_th.dest = usin->sin_port;
-
-       buff = sock_wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
-       if (buff == NULL)
+       if (level != SOL_TCP)
        {
-               release_sock(sk);
-               return(-ENOMEM);
+               return tp->af_specific->setsockopt(sk, level, optname, 
+                                                  optval, optlen);
        }
-       buff->sk = sk;
-       buff->free = 0;
-       buff->localroute = sk->localroute;
-
-       /*
-        *      Put in the IP header and routing stuff.
-        */
-
-       tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
-               IPPROTO_TCP, sk->opt, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
-       if (tmp < 0)
-       {
-               sock_wfree(sk, buff);
-               release_sock(sk);
-               return(-ENETUNREACH);
-       }
-       if ((rt = sk->ip_route_cache) != NULL && !sk->saddr)
-               sk->saddr = rt->rt_src;
-       sk->rcv_saddr = sk->saddr;
-
-       /*
-        * Set up our outgoing TCP sequence number
-        */
-       sk->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr,
-                                                  sk->dummy_th.source,
-                                                  usin->sin_port);
-       sk->window_seq = sk->write_seq;
-       sk->rcv_ack_seq = sk->write_seq -1;
-
-       t1 = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr));
-
-       memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
-       buff->seq = sk->write_seq++;
-       t1->seq = htonl(buff->seq);
-       sk->sent_seq = sk->write_seq;
-       buff->end_seq = sk->write_seq;
-       t1->ack = 0;
-       t1->window = 2;
-       t1->syn = 1;
-       t1->doff = 6;
-       /* use 512 or whatever user asked for */
-
-       if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
-               sk->window_clamp=rt->rt_window;
-       else
-               sk->window_clamp=0;
-
-       if (sk->user_mss)
-               sk->mtu = sk->user_mss;
-       else if (rt)
-               sk->mtu = rt->rt_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
-       else
-               sk->mtu = 576 - sizeof(struct iphdr) - sizeof(struct tcphdr);
-
-       /*
-        *      but not bigger than device MTU
-        */
-
-       if(sk->mtu <32)
-               sk->mtu = 32;   /* Sanity limit */
-
-       sk->mtu = min(sk->mtu, dev->mtu - sizeof(struct iphdr) - sizeof(struct tcphdr));
-
-#ifdef CONFIG_SKIP
-
-       /*
-        *      SKIP devices set their MTU to 65535. This is so they can take packets
-        *      unfragmented to security process then fragment. They could lie to the
-        *      TCP layer about a suitable MTU, but it's easier to let skip sort it out
-        *      simply because the final package we want unfragmented is going to be
-        *
-        *      [IPHDR][IPSP][Security data][Modified TCP data][Security data]
-        */
-
-       if(skip_pick_mtu!=NULL)         /* If SKIP is loaded.. */
-               sk->mtu=skip_pick_mtu(sk->mtu,dev);
-#endif
-
-       /*
-        *      Put in the TCP options to say MTU.
-        */
-
-       ptr = skb_put(buff,4);
-       ptr[0] = 2;
-       ptr[1] = 4;
-       ptr[2] = (sk->mtu) >> 8;
-       ptr[3] = (sk->mtu) & 0xff;
-       buff->csum = csum_partial(ptr, 4, 0);
-       tcp_send_check(t1, sk->saddr, sk->daddr,
-                 sizeof(struct tcphdr) + 4, buff);
-
-       /*
-        *      This must go first otherwise a really quick response will get reset.
-        */
-
-       tcp_cache_zap();
-       tcp_set_state(sk,TCP_SYN_SENT);
-       if(rt&&rt->rt_flags&RTF_IRTT)
-               sk->rto = rt->rt_irtt;
-       else
-               sk->rto = TCP_TIMEOUT_INIT;
-       sk->delack_timer.function = tcp_delack_timer;
-       sk->delack_timer.data = (unsigned long) sk;
-       sk->retransmit_timer.function = tcp_retransmit_timer;
-       sk->retransmit_timer.data = (unsigned long)sk;
-       sk->retransmits = 0;
-       sk->prot->queue_xmit(sk, dev, buff, 0);
-       tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-       tcp_statistics.TcpActiveOpens++;
-       tcp_statistics.TcpOutSegs++;
-
-       release_sock(sk);
-       return(0);
-}
-
-/*
- *     Socket option code for TCP.
- */
-
-int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
-{
-       int val,err;
-
-       if(level!=SOL_TCP)
-               return ip_setsockopt(sk,level,optname,optval,optlen);
 
        if (optval == NULL)
                return(-EINVAL);
 
-       err=verify_area(VERIFY_READ, optval, sizeof(int));
-       if(err)
-               return err;
-
-       get_user(val, (int *)optval);
+       if (get_user(val, (int *)optval))
+               return -EFAULT;
 
        switch(optname)
        {
@@ -2155,12 +1801,17 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int op
        }
 }
 
-int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
+int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, 
+                  int *optlen)
 {
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
        int val,err;
 
-       if(level!=SOL_TCP)
-               return ip_getsockopt(sk,level,optname,optval,optlen);
+       if(level != SOL_TCP)
+       {
+               return tp->af_specific->getsockopt(sk, level, optname,
+                                                  optval, optlen);
+       }
 
        switch(optname)
        {
@@ -2186,29 +1837,21 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *o
        return(0);
 }
 
+void tcp_set_keepalive(struct sock *sk, int val)
+{
+       if (!sk->keepopen && val)
+       {
+               tcp_inc_slow_timer(TCP_SLT_KEEPALIVE);
+       }
+       else if (sk->keepopen && !val)
+       {
+               tcp_dec_slow_timer(TCP_SLT_KEEPALIVE);
+       }
+}
 
-struct proto tcp_prot = {
-       tcp_close,
-       ip_build_header,
-       tcp_connect,
-       tcp_accept,
-       ip_queue_xmit,
-       tcp_retransmit,
-       tcp_write_wakeup,
-       tcp_read_wakeup,
-       tcp_rcv,
-       tcp_select,
-       tcp_ioctl,
-       NULL,
-       tcp_shutdown,
-       tcp_setsockopt,
-       tcp_getsockopt,
-       tcp_sendmsg,
-       tcp_recvmsg,
-       NULL,           /* No special bind() */
-       128,
-       0,
-       "TCP",
-       0, 0,
-       {NULL,}
-};
+/*
+ * Local variables:
+ *  compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -c -o tcp.o tcp.c"
+ * c-file-style: "Linux"
+ * End:
+ */
index cb615e001be6a86fc85327ac73fdbf1c6de432a7..d9188b18f1b6408e4b9f6881d479cc22c5c78e7d 100644 (file)
  *             Matthew Dillon, <dillon@apollo.west.oic.com>
  *             Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  *             Jorge Cwik, <jorge@laser.satlink.net>
+ */
+
+/*
+ *     TODO
+ *             - A better sock cache
+ *
+ */
+
+/*
+ * Changes:
+ *             Pedro Roque     :       Fast Retransmit/Recovery.
+ *                                     Two receive queues.
+ *                                     Retransmit queue handled by TCP.
+ *                                     Better retransmit timer handling.
+ *                                     New congestion avoidance.
+ *                                     Header prediction.
+ *                                     Variable renaming.
  *
- * FIXES
- *             Pedro Roque     :       Double ACK bug
+ *             Eric            :       Fast Retransmit.
+ *             Randy Scott     :       MSS option defines.
  *             Eric Schenk     :       Fixes to slow start algorithm.
  *             Eric Schenk     :       Yet another double ACK bug.
  *             Eric Schenk     :       Delayed ACK bug fixes.
  *             Eric Schenk     :       Floyd style fast retrans war avoidance.
- *             Eric Schenk     :       Skip fast retransmit on small windows.
- *             Eric schenk     :       Fixes to retransmission code to
- *                             :       avoid extra retransmission.
- *             Theodore Ts'o   :       Do secure TCP sequence numbers.
  */
 
 #include <linux/config.h>
-#include <linux/types.h>
-#include <linux/random.h>
 #include <net/tcp.h>
 
+
 /*
- *     Policy code extracted so it's now separate
+ *     Policy code extracted so it's now seperate
  */
 
 /*
  *     Called each time to estimate the delayed ack timeout. This is
- *     how it should be done so a fast link isn't impacted by ack delay.
+ *     how it should be done so a fast link isnt impacted by ack delay.
+ *
+ *     I think we need a medium deviation here also...
+ *     The estimated value is changing to fast
  */
  
-extern __inline__ void tcp_delack_estimator(struct sock *sk)
+extern __inline__ void tcp_delack_estimator(struct tcp_opt *tp)
 {
+       int m;
+
        /*
         *      Delayed ACK time estimator.
         */
        
-       if (sk->lrcvtime == 0) 
-       {
-               sk->lrcvtime = jiffies;
-               sk->ato = HZ/3;
-       }
-       else 
+       m = jiffies - tp->lrcvtime;
+
+       tp->lrcvtime = jiffies;
+
+       if (m < 0)
+               return;
+
+       /*
+        * if the mesured value is bigger than
+        * twice the round trip time ignore it.
+        */
+       if ((m << 2) <= tp->srtt) 
        {
-               int m;
-               
-               m = jiffies - sk->lrcvtime;
+               m -= (tp->iat >> 3);
+               tp->iat += m;
 
-               sk->lrcvtime = jiffies;
+               if (m <0)
+                       m = -m;
 
-               if (m <= 0)
-                       m = 1;
+               m -= (tp->iat_mdev >> 2);
+               tp->iat_mdev += m;
 
-               /* This used to test against sk->rtt.
-                * On a purely receiving link, there is no rtt measure.
-                * The result is that we lose delayed ACKs on one-way links.
-                * Therefore we test against sk->rto, which will always
-                * at least have a default value.
-                */
-               if (m > sk->rto)
-               {
-                       sk->ato = sk->rto;
-                       /*
-                        * printk(KERN_DEBUG "ato: rtt %lu\n", sk->ato);
-                        */
-               }
-               else 
-               {
-                       /*
-                        * Very fast acting estimator.
-                        * May fluctuate too much. Probably we should be
-                        * doing something like the rtt estimator here.
-                        */
-                       sk->ato = (sk->ato >> 1) + m;
-                       /*
-                        * printk(KERN_DEBUG "ato: m %lu\n", sk->ato);
-                        */
-               }
+               tp->ato = (tp->iat >> 3) + (tp->iat_mdev >> 2);
+
+               if (tp->ato < HZ/50)
+                       tp->ato = HZ/50;
        }
+       else
+               tp->ato = 0;
 }
 
 /*
@@ -100,8 +104,8 @@ extern __inline__ void tcp_delack_estimator(struct sock *sk)
  *     retransmitted [see Karn/Partridge Proceedings SIGCOMM 87]. 
  *     The algorithm is from the SIGCOMM 88 piece by Van Jacobson.
  */
-extern __inline__ void tcp_rtt_estimator(struct sock *sk, struct sk_buff *oskb)
+
+extern __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
 {
        long m;
        /*
@@ -111,130 +115,72 @@ extern __inline__ void tcp_rtt_estimator(struct sock *sk, struct sk_buff *oskb)
         *      This is designed to be as fast as possible 
         *      m stands for "measurement".
         */
-       
-       m = jiffies - oskb->when;  /* RTT */
+       /*
+        *      On a 1990 paper the rto value is changed to:
+        *      RTO = rtt + 4 * mdev
+        */
 
-       if (sk->rtt != 0) {
+       m = mrtt;  /* RTT */
+
+       if (tp->srtt != 0) {
                if(m<=0)
                        m=1;            /* IS THIS RIGHT FOR <0 ??? */
-               m -= (sk->rtt >> 3);    /* m is now error in rtt est */
-               sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
+               m -= (tp->srtt >> 3);   /* m is now error in rtt est */
+               tp->srtt += m;          /* rtt = 7/8 rtt + 1/8 new */
                if (m < 0)
                        m = -m;         /* m is now abs(error) */
-               m -= (sk->mdev >> 2);   /* similar update on mdev */
-               sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
+               m -= (tp->mdev >> 2);   /* similar update on mdev */
+               tp->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
        } else {
-               /* no previous measure. */
-               sk->rtt = m<<3;         /* take the measured time to be rtt */
-               sk->mdev = m<<1;        /* make sure rto = 3*rtt */
+                                       /* no previous measure. */
+               tp->srtt = m<<3;        /* take the measured time to be rtt */
+               tp->mdev = m<<2;        /* make sure rto = 3*rtt */
        }
 
+
        /*
         *      Now update timeout.  Note that this removes any backoff.
         */
                         
-       /* Jacobson's algorithm calls for rto = R + 4V.
-        * We diverge from Jacobson's algorithm here. See the commentary
-        * in tcp_ack to understand why.
-        */
-       sk->rto = (sk->rtt >> 3) + sk->mdev;
-       sk->rto += (sk->rto>>2) + (sk->rto >> (sk->cong_window-1));
-       if (sk->rto > 120*HZ)
-               sk->rto = 120*HZ;
-       if (sk->rto < HZ/5)     /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
-               sk->rto = HZ/5;
-       sk->backoff = 0;
-}
-
-/*
- *     Cached last hit socket
- */
-static volatile unsigned long  th_cache_saddr, th_cache_daddr;
-static volatile unsigned short  th_cache_dport, th_cache_sport;
-static volatile struct sock *th_cache_sk;
+       tp->rto = (tp->srtt >> 3) + tp->mdev;
 
-void tcp_cache_zap(void)
-{
-       th_cache_sk=NULL;
-}
+       if (tp->rto > 120*HZ)
+               tp->rto = 120*HZ;
 
-/*
- *     Find the socket, using the last hit cache if applicable. The cache is not quite
- *     right...
- */
+       /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
+       if (tp->rto < HZ/5)
+               tp->rto = HZ/5;
 
-static inline struct sock * get_tcp_sock(u32 saddr, u16 sport, u32 daddr, u16 dport, u32 paddr, u16 pport)
-{
-       struct sock * sk;
-
-       sk = (struct sock *) th_cache_sk;
-       if (!sk || saddr != th_cache_saddr || daddr != th_cache_daddr ||
-           sport != th_cache_sport || dport != th_cache_dport) {
-               sk = get_sock(&tcp_prot, dport, saddr, sport, daddr, paddr, pport);
-               if (sk) {
-                       th_cache_saddr=saddr;
-                       th_cache_daddr=daddr;
-                       th_cache_dport=dport;
-                       th_cache_sport=sport;
-                       th_cache_sk=sk;
-               }
-       }
-       return sk;
+       tp->backoff = 0;
 }
 
 /*
- * React to an out-of-window TCP sequence number in an incoming packet
+ *     This functions checks to see if the tcp header is actually acceptable. 
  */
  
-static void bad_tcp_sequence(struct sock *sk, struct tcphdr *th, u32 end_seq,
-             struct device *dev)
+extern __inline__ int tcp_sequence(struct tcp_opt *tp, u32 seq, u32 seg_nxt)
 {
-       if (th->rst)
-               return;
+       u32 end_window = tp->rcv_wup + tp->rcv_wnd;
+       u32 end_seq = seg_nxt;
 
        /*
-        *      Send a reset if we get something not ours and we are
-        *      unsynchronized. Note: We don't do anything to our end. We
-        *      are just killing the bogus remote connection then we will
-        *      connect again and it will work (with luck).
+        *      When the window is open (most common case)
+        *      we want to accept segments if they have yet unseen data
+        *      or in the case of a dataless segment if seg.seq == rcv.nxt
+        *      this means:
+        *
+        *      if (seq == end_seq)
+        *              end_seq >= rcv.nxt
+        *      else
+        *              end_seq >  rcv.nxt
         */
-        
-       if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) 
-       {
-               tcp_send_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
-               return;
-       }
 
-       /*
-        *      This packet is old news. Usually this is just a resend
-        *      from the far end, but sometimes it means the far end lost
-        *      an ACK we sent, so we better send an ACK.
-        */
-       /*
-        *      BEWARE! Unconditional answering by ack to out-of-window ack
-        *      can result in infinite exchange of empty acks.
-        *      This check cures bug, found by Michiel Boland, but
-        *      not another possible cases.
-        *      If we are in TCP_TIME_WAIT, we have already received
-        *      FIN, so that our peer need not window update. If our
-        *      ACK were lost, peer would retransmit his FIN anyway. --ANK
-        */
-       if (sk->state != TCP_TIME_WAIT || ntohl(th->seq) != end_seq)
-               tcp_send_ack(sk);
-}
+       if (seq == end_seq)
+               end_seq++;
 
-/*
- *     This functions checks to see if the tcp header is actually acceptable. 
- */
-extern __inline__ int tcp_sequence(struct sock *sk, u32 seq, u32 end_seq)
-{
-       u32 end_window = sk->lastwin_seq + sk->window;
-       return  /* if start is at end of window, end must be too (zero window) */
-               (seq == end_window && seq == end_seq) ||
-               /* if start is before end of window, check for interest */
-               (before(seq, end_window) && !before(end_seq, sk->acked_seq));
+       return ((before(seq, end_window) && after(end_seq, tp->rcv_nxt)) ||
+               (seq == end_window && seq == end_seq));
 }
 
 /*
@@ -273,7 +219,7 @@ static int tcp_reset(struct sock *sk, struct sk_buff *skb)
 #endif 
        if (!sk->dead) 
                sk->state_change(sk);
-       kfree_skb(skb, FREE_READ);
+
        return(0);
 }
 
@@ -289,11 +235,11 @@ static int tcp_reset(struct sock *sk, struct sk_buff *skb)
  *     as Linux gets deployed on 100Mb/sec networks.
  */
  
-static void tcp_options(struct sock *sk, struct tcphdr *th)
+int tcp_parse_options(struct tcphdr *th)
 {
        unsigned char *ptr;
        int length=(th->doff*4)-sizeof(struct tcphdr);
-       int mss_seen = 0;
+       int mss = 0;
     
        ptr = (unsigned char *)(th + 1);
   
@@ -304,7 +250,7 @@ static void tcp_options(struct sock *sk, struct tcphdr *th)
                switch(opcode)
                {
                        case TCPOPT_EOL:
-                               return;
+                               return 0;
                        case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
                                length--;
                                ptr--;          /* the opsize=*ptr++ above was a mistake */
@@ -312,14 +258,13 @@ static void tcp_options(struct sock *sk, struct tcphdr *th)
                        
                        default:
                                if(opsize<=2)   /* Avoid silly options looping forever */
-                                       return;
+                                       return 0;
                                switch(opcode)
                                {
                                        case TCPOPT_MSS:
-                                               if(opsize==4 && th->syn)
+                                               if(opsize==TCPOLEN_MSS && th->syn)
                                                {
-                                                       sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
-                                                       mss_seen = 1;
+                                                       mss = ntohs(*(unsigned short *)ptr);
                                                }
                                                break;
                                                /* Add other options here as people feel the urge to implement stuff like large windows */
@@ -328,612 +273,420 @@ static void tcp_options(struct sock *sk, struct tcphdr *th)
                                length-=opsize;
                }
        }
-       if (th->syn) 
-       {
-               if (! mss_seen)
-                     sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
-       }
-#ifdef CONFIG_INET_PCTCP
-       sk->mss = min(sk->max_window >> 1, sk->mtu);
-#else    
-       sk->mss = min(sk->max_window, sk->mtu);
-       sk->max_unacked = 2 * sk->mss;
-#endif  
+
+       return mss;
 }
 
 
-/*
- *     This routine handles a connection request.
- *     It should make sure we haven't already responded.
- *     Because of the way BSD works, we have to send a syn/ack now.
- *     This also means it will be harder to close a socket which is
- *     listening.
+/* 
+ *  See draft-stevens-tcpca-spec-01 for documentation.
  */
-static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
-                u32 daddr, u32 saddr, struct options *opt, struct device *dev, u32 seq)
-{
-       struct sock *newsk;
-       struct tcphdr *th;
-       struct rtable *rt;
-  
-       th = skb->h.th;
 
-       /* If the socket is dead, don't accept the connection. */
-       if (!sk->dead) 
-       {
-               sk->data_ready(sk,0);
-       }
-       else 
-       {
-               if(sk->debug)
-                       printk("Reset on %p: Connect on dead socket.\n",sk);
-               tcp_send_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
-               tcp_statistics.TcpAttemptFails++;
-               kfree_skb(skb, FREE_READ);
-               return;
-       }
+static void tcp_fast_retrans(struct sock *sk, u32 ack, int not_dup)
+{
+       struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
 
        /*
-        *      Make sure we can accept more.  This will prevent a
-        *      flurry of syns from eating up all our memory.
-        *
-        *      BSD does some funnies here and allows 3/2 times the
-        *      set backlog as a fudge factor. That's just too gross.
+        * An ACK is a duplicate if:
+        * (1) it has the same sequence number as the largest number we've 
+        *     seen,
+        * (2) it has the same window as the last ACK,
+        * (3) we have outstanding data that has not been ACKed
+        * (4) The packet was not carrying any data.
+        * (5) [From Floyds paper on fast retransmit wars]
+        *     The packet acked data after high_seq;
         */
 
-       if (sk->ack_backlog >= sk->max_ack_backlog) 
+       if (ack == tp->snd_una && sk->packets_out && (not_dup == 0) &&
+           after(ack, tp->high_seq))
        {
-               tcp_statistics.TcpAttemptFails++;
-               kfree_skb(skb, FREE_READ);
-               return;
-       }
-
-       /*
-        * We need to build a new sock struct.
-        * It is sort of bad to have a socket without an inode attached
-        * to it, but the wake_up's will just wake up the listening socket,
-        * and if the listening socket is destroyed before this is taken
-        * off of the queue, this will take care of it.
-        */
+               
+               sk->dup_acks++; 
+               
 
-       newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
-       if (newsk == NULL) 
-       {
-               /* just ignore the syn.  It will get retransmitted. */
-               tcp_statistics.TcpAttemptFails++;
-               kfree_skb(skb, FREE_READ);
-               return;
-       }
+               /*
+                * 1. When the third duplicate ack is received, set ssthresh 
+                * to one half the current congestion window, but no less 
+                * than two segments. Retransmit the missing segment.
+                */
+       
+               if (sk->dup_acks == 3) 
+               {
+                       sk->ssthresh = max(sk->cong_window >> 1, 2);
+                       sk->cong_window = sk->ssthresh + 3;
+                       tcp_do_retransmit(sk, 0);
+               }
 
-       memcpy(newsk, sk, sizeof(*newsk));
-       newsk->opt = NULL;
-       newsk->ip_route_cache  = NULL;
-       if (opt && opt->optlen) 
-       {
-               sk->opt = (struct options*)kmalloc(sizeof(struct options)+opt->optlen, GFP_ATOMIC);
-               if (!sk->opt) 
+               /*
+                * 2. Each time another duplicate ACK arrives, increment 
+                * cwnd by the segment size. [...] Transmit a packet...
+                *
+                * Packet transmission will be done on normal flow processing
+                * since we're not in "retransmit mode"
+                */
+               
+               if (sk->dup_acks > 3) 
                {
-                       kfree_s(newsk, sizeof(struct sock));
-                       tcp_statistics.TcpAttemptFails++;
-                       kfree_skb(skb, FREE_READ);
-                       return;
+                       sk->cong_window++;
                }
-               if (ip_options_echo(sk->opt, opt, daddr, saddr, skb)) 
+       }
+       else
+       {
+               /*
+                * 3. When the next ACK arrives that acknowledges new data,
+                *    set cwnd to ssthresh
+                */
+
+               if (sk->dup_acks >= 3)
                {
-                       kfree_s(sk->opt, sizeof(struct options)+opt->optlen);
-                       kfree_s(newsk, sizeof(struct sock));
-                       tcp_statistics.TcpAttemptFails++;
-                       kfree_skb(skb, FREE_READ);
-                       return;
+                       sk->tp_pinfo.af_tcp.retrans_head = NULL;
+                       sk->cong_window = sk->ssthresh;
+                       sk->retransmits = 0;
                }
+               sk->dup_acks = 0;
        }
-       skb_queue_head_init(&newsk->write_queue);
-       skb_queue_head_init(&newsk->receive_queue);
-       newsk->send_head = NULL;
-       newsk->send_tail = NULL;
-       newsk->send_next = NULL;
-       skb_queue_head_init(&newsk->back_log);
-       newsk->rtt = 0;
-       newsk->rto = TCP_TIMEOUT_INIT;
-       newsk->mdev = TCP_TIMEOUT_INIT;
-       newsk->max_window = 0;
-       /*
-        * See draft-stevens-tcpca-spec-01 for discussion of the
-        * initialization of these values.
-        */
-       newsk->cong_window = 1;
-       newsk->cong_count = 0;
-       newsk->ssthresh = 0x7fffffff;
-
-       newsk->lrcvtime = 0;
-       newsk->idletime = 0;
-       newsk->high_seq = 0;
-       newsk->backoff = 0;
-       newsk->blog = 0;
-       newsk->intr = 0;
-       newsk->proc = 0;
-       newsk->done = 0;
-       newsk->partial = NULL;
-       newsk->pair = NULL;
-       newsk->wmem_alloc = 0;
-       newsk->rmem_alloc = 0;
-       newsk->localroute = sk->localroute;
-
-       newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
-
-       newsk->err = 0;
-       newsk->shutdown = 0;
-       newsk->ack_backlog = 0;
-       newsk->acked_seq = skb->seq+1;
-       newsk->lastwin_seq = skb->seq+1;
-       newsk->delay_acks = 1;
-       newsk->copied_seq = skb->seq+1;
-       newsk->fin_seq = skb->seq;
-       newsk->syn_seq = skb->seq;
-       newsk->state = TCP_SYN_RECV;
-       newsk->timeout = 0;
-       newsk->ip_xmit_timeout = 0;
-       newsk->write_seq = seq; 
-       newsk->window_seq = newsk->write_seq;
-       newsk->rcv_ack_seq = newsk->write_seq;
-       newsk->urg_data = 0;
-       newsk->retransmits = 0;
-       newsk->linger=0;
-       newsk->destroy = 0;
-       init_timer(&newsk->timer);
-       newsk->timer.data = (unsigned long)newsk;
-       newsk->timer.function = &net_timer;
-       init_timer(&newsk->delack_timer);
-       newsk->delack_timer.data = (unsigned long)newsk;
-       newsk->delack_timer.function = tcp_delack_timer;
-       init_timer(&newsk->retransmit_timer);
-       newsk->retransmit_timer.data = (unsigned long)newsk;
-       newsk->retransmit_timer.function = tcp_retransmit_timer;
-       newsk->dummy_th.source = skb->h.th->dest;
-       newsk->dummy_th.dest = skb->h.th->source;
        
-#ifdef CONFIG_IP_TRANSPARENT_PROXY
-       /* 
-        *      Deal with possibly redirected traffic by setting num to
-        *      the intended destination port of the received packet.
-        */
-       newsk->num = ntohs(skb->h.th->dest);
-
-#endif
-       /*
-        *      Swap these two, they are from our point of view. 
-        */
-        
-       newsk->daddr = saddr;
-       newsk->saddr = daddr;
-       newsk->rcv_saddr = daddr;
+}
 
-       put_sock(newsk->num,newsk);
-       newsk->acked_seq = skb->seq + 1;
-       newsk->copied_seq = skb->seq + 1;
-       newsk->socket = NULL;
+int sysctl_tcp_vegas_cong_avoidance = 1;
 
-       /*
-        *      Grab the ttl and tos values and use them 
-        */
+/*
+ *      TCP slow start and congestion avoidance in two flavors:
+ *      RFC 1122 and TCP Vegas.
+ *
+ *      This is a /proc/sys configurable option. 
+ */
 
-       newsk->ip_ttl=sk->ip_ttl;
-       newsk->ip_tos=skb->ip_hdr->tos;
+#define SHIFT_FACTOR 12
 
+static void tcp_cong_avoid_vegas(struct sock *sk, u32 seq, u32 ack,
+                                u32 seq_rtt)
+{
        /*
-        *      Use 512 or whatever user asked for 
+        *      From:
+        *      TCP Vegas: New Techniques for Congestion 
+        *      Detection and Avoidance.
+        *              
+        *
+        *      Warning: This code is a scratch implementation taken
+        *      from the paper only. The code they distribute seams
+        *      to have improved several things over the initial spec.
         */
 
-       /*
-        *      Note use of sk->user_mss, since user has no direct access to newsk 
-        */
+       u32 Actual, Expected;
+       u32 snt_bytes;
+       struct tcp_opt * tp;
 
-       rt = ip_rt_route(newsk->opt && newsk->opt->srr ? newsk->opt->faddr : saddr, 0);
-       newsk->ip_route_cache = rt;
+       tp = &(sk->tp_pinfo.af_tcp);
+
+       if (!seq_rtt)
+               seq_rtt = 1;
        
-       if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
-               newsk->window_clamp = rt->rt_window;
+       if (tp->basertt)
+               tp->basertt = min(seq_rtt, tp->basertt);
        else
-               newsk->window_clamp = 0;
+               tp->basertt = seq_rtt;
+               
                
-       if (sk->user_mss)
-               newsk->mtu = sk->user_mss;
-       else if (rt)
-               newsk->mtu = rt->rt_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
-       else 
-               newsk->mtu = 576 - sizeof(struct iphdr) - sizeof(struct tcphdr);
-
        /*
-        *      But not bigger than device MTU 
+        * 
+        *      Actual   = throughput for this segment.
+        *      Expected = number_of_bytes in transit / BaseRTT
+        * 
         */
 
-       newsk->mtu = min(newsk->mtu, dev->mtu - sizeof(struct iphdr) - sizeof(struct tcphdr));
+       snt_bytes = (ack - seq) << SHIFT_FACTOR;
+               
+       Actual =  snt_bytes / seq_rtt;
+       Expected = ((tp->snd_nxt - tp->snd_una) << SHIFT_FACTOR) / tp->basertt;
 
-#ifdef CONFIG_SKIP
-       
-       /*
-        *      SKIP devices set their MTU to 65535. This is so they can take packets
-        *      unfragmented to security process then fragment. They could lie to the
-        *      TCP layer about a suitable MTU, but it's easier to let skip sort it out
-        *      simply because the final package we want unfragmented is going to be
-        *
-        *      [IPHDR][IPSP][Security data][Modified TCP data][Security data]
-        */
-        
-       if(skip_pick_mtu!=NULL)         /* If SKIP is loaded.. */
-               sk->mtu=skip_pick_mtu(sk->mtu,dev);
-#endif
+/*             
+       printk(KERN_DEBUG "A:%x E:%x rtt:%x srtt:%x win: %d\n", 
+              Actual, Expected, seq_rtt, tp->srtt, sk->cong_window);
+      */
        /*
-        *      This will min with what arrived in the packet 
+        *      Slow Start
         */
-
-       tcp_options(newsk,skb->h.th);
        
-       tcp_cache_zap();
-       tcp_send_synack(newsk, sk, skb);
-}
-
-
-/*
- * Handle a TCP window that shrunk on us. It shouldn't happen,
- * but..
- *
- * We may need to move packets from the send queue
- * to the write queue, if the window has been shrunk on us.
- * The RFC says you are not allowed to shrink your window
- * like this, but if the other end does, you must be able
- * to deal with it.
- */
-void tcp_window_shrunk(struct sock * sk, u32 window_seq)
-{
-       struct sk_buff *skb;
-       struct sk_buff *skb2;
-       struct sk_buff *wskb = NULL;
-       
-       skb2 = sk->send_head;
-       sk->send_head = NULL;
-       sk->send_tail = NULL;
-       sk->send_next = NULL;
-
-       /*
-        *      This is an artifact of a flawed concept. We want one
-        *      queue and a smarter send routine when we send all.
-        */
-       cli();
-       while (skb2 != NULL) 
+       if (sk->cong_window < sk->ssthresh &&
+           (seq == tp->snd_nxt ||
+             (((Expected - Actual) <=
+               ((TCP_VEGAS_GAMMA << SHIFT_FACTOR) * sk->mss / tp->basertt))
+              )
+            ))
+       {
+                       
+               /*
+                * "Vegas allows exponential growth only every other
+                *  RTT"
+                */
+                       
+               if (sk->cong_count || sk->cong_window <= 2)
+               {
+                       sk->cong_window++;
+                       sk->cong_count = 0;
+               }
+               else
+                       sk->cong_count++;
+       }
+       else 
        {
-               skb = skb2;
-               skb2 = skb->link3;
-               skb->link3 = NULL;
-               if (after(skb->end_seq, window_seq)) 
+               /*
+                *      Congestion Avoidance
+                */
+                       
+               if (Expected - Actual <=
+                   ((TCP_VEGAS_ALPHA << SHIFT_FACTOR) * sk->mss / tp->basertt))
                {
-                       if (sk->packets_out > 0) 
-                               sk->packets_out--;
-                       /* We may need to remove this from the dev send list. */
-                       if (skb->next != NULL) 
+                       /* Increase Linearly */
+                               
+                       if (sk->cong_count >= sk->cong_window)
                        {
-                               skb_unlink(skb);                                
+                               sk->cong_window++;
+                               sk->cong_count = 0;
                        }
-                       /* Now add it to the write_queue. */
-                       if (wskb == NULL)
-                               skb_queue_head(&sk->write_queue,skb);
                        else
-                               skb_append(wskb,skb);
-                       wskb = skb;
-               } 
-               else 
+                               sk->cong_count++;
+               }
+                       
+               if (Expected - Actual >=
+                   ((TCP_VEGAS_BETA << SHIFT_FACTOR) * sk->mss / tp->basertt))
                {
-                       if (sk->send_head == NULL) 
+                       /* Decrease Linearly */
+                               
+                       if (sk->cong_count >= sk->cong_window)
                        {
-                               sk->send_head = skb;
-                               sk->send_tail = skb;
-                               sk->send_next = skb;
+                               sk->cong_window--;
+                               sk->cong_count = 0;
                        }
                        else
-                       {
-                               sk->send_tail->link3 = skb;
-                               sk->send_tail = skb;
-                       }
-                       skb->link3 = NULL;
+                               sk->cong_count++;
+                               
+                               
+                       /* Never less than 2 segments */
+                       if (sk->cong_window < 2)
+                               sk->cong_window = 2;
                }
+
+
+       }
+}
+
+static void tcp_cong_avoid_vanj(struct sock *sk, u32 seq, u32 ack, u32 seq_rtt)
+{
+       
+        /* 
+         * This is Jacobson's slow start and congestion avoidance. 
+         * SIGCOMM '88, p. 328.  Because we keep cong_window in 
+         * integral mss's, we can't do cwnd += 1 / cwnd.  
+         * Instead, maintain a counter and increment it once every 
+         * cwnd times.  
+         */
+
+        if (sk->cong_window <= sk->ssthresh)  
+       {
+                /* 
+                 *     In "safe" area, increase
+                 */
+
+                sk->cong_window++;
        }
-       sti();
+        else 
+       {
+                /*
+                 *     In dangerous area, increase slowly.  
+                 *      In theory this is
+                 *     sk->cong_window += 1 / sk->cong_window
+                 */
+
+                if (sk->cong_count >= sk->cong_window) {
+                       
+                        sk->cong_window++;
+                        sk->cong_count = 0;
+                }
+                else 
+                        sk->cong_count++;
+        }       
 }
 
 
+#define FLAG_DATA              0x01
+#define FLAG_WIN_UPDATE                0x02
+#define FLAG_DATA_ACKED                0x04
 /*
  *     This routine deals with incoming acks, but not outgoing ones.
- *
- *     This routine is totally _WRONG_. The list structuring is wrong,
- *     the algorithm is wrong, the code is wrong.
  */
 
-static int tcp_ack(struct sock *sk, struct tcphdr *th, u32 ack, int len)
+static int tcp_ack(struct sock *sk, struct tcphdr *th, 
+                  u32 ack_seq, u32 ack, int len)
 {
        int flag = 0;
-       u32 window_seq;
+       u32 seq = 0;
+       u32 seq_rtt = 0;
+       struct sk_buff *skb;
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
-       /* 
-        * 1 - there was data in packet as well as ack or new data is sent or 
-        *     in shutdown state
-        * 2 - data from retransmit queue was acked and removed
-        * 4 - window shrunk or data from retransmit queue was acked and removed
-        */
 
        if(sk->zapped)
                return(1);      /* Dead, can't ack any more so why bother */
 
-       /*
-        *      We have dropped back to keepalive timeouts. Thus we have
-        *      no retransmits pending.
-        */
         
-       if (sk->ip_xmit_timeout == TIME_KEEPOPEN)
-               sk->retransmits = 0;
+       if (tp->pending == TIME_KEEPOPEN) 
+       {
+               tp->probes_out = 0;
+       }
 
+       tp->rcv_tstamp = jiffies;
+               
        /*
         *      If the ack is newer than sent or older than previous acks
         *      then we can probably ignore it.
         */
         
-       if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
+       if (after(ack, tp->snd_nxt) || before(ack, tp->snd_una))
                goto uninteresting_ack;
 
        /*
-        *      Have we discovered a larger window
+        *      If there is data set flag 1
         */
-       window_seq = ntohs(th->window);
-       if (window_seq > sk->max_window
+        
+       if (len != th->doff*4
        {
-               sk->max_window = window_seq;
-#ifdef CONFIG_INET_PCTCP
-               /* Hack because we don't send partial packets to non SWS
-                  handling hosts */
-               sk->mss = min(window_seq>>1, sk->mtu);
-#else
-               sk->mss = min(window_seq, sk->mtu);
-#endif 
+               flag |= FLAG_DATA;
+               tcp_delack_estimator(tp);
        }
-       window_seq += ack;
 
        /*
-        *      See if our window has been shrunk. 
+        *      Update our send window
         */
-       if (after(sk->window_seq, window_seq))
-               tcp_window_shrunk(sk, window_seq);
 
        /*
-        *      Pipe has emptied
-        */      
-       if (sk->send_tail == NULL || sk->send_head == NULL) 
+        *      This is the window update code as per RFC 793
+        *      snd_wl{1,2} are used to prevent unordered
+        *      segments from shrinking the window 
+        */
+
+       if ((tp->snd_wl1 == 0) || before(tp->snd_wl1, ack_seq) ||
+           (tp->snd_wl1 == ack_seq && !after(tp->snd_wl2, ack)))
        {
-               sk->send_head = NULL;
-               sk->send_tail = NULL;
-               sk->send_next = NULL;
-               sk->packets_out= 0;
+               tp->snd_wnd = ntohs(th->window);
+               tp->snd_wl1 = ack_seq;
+               tp->snd_wl2 = ack;
+
+               flag |= FLAG_WIN_UPDATE;
+
+               if (tp->snd_wnd > sk->max_window)
+               {
+                       sk->max_window = tp->snd_wnd;
+               }
        }
 
+       
        /*
-        *      We don't want too many packets out there. 
+        *      We passed data and got it acked, remove any soft error
+        *      log. Something worked...
         */
         
-       if (sk->ip_xmit_timeout == TIME_WRITE && 
-               sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
+       sk->err_soft = 0;
+
+       /*
+        *      If this ack opens up a zero window, clear backoff.  It was
+        *      being used to time the probes, and is probably far higher than
+        *      it needs to be for normal retransmission.
+        */
+
+       if (tp->pending == TIME_PROBE0) 
        {
+               tp->probes_out = 0;     /* Our probe was answered */
                
-               /* 
-                * This is Jacobson's slow start and congestion avoidance. 
-                * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
-                * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
-                * counter and increment it once every cwnd times.  It's possible
-                * that this should be done only if sk->retransmits == 0.  I'm
-                * interpreting "new data is acked" as including data that has
-                * been retransmitted but is just now being acked.
+               /*
+                *      Was it a usable window open ?
                 */
-               if (sk->cong_window <= sk->ssthresh)
-                       /* 
-                        *      In "safe" area, increase
-                        */
-                       sk->cong_window++;
-               else 
+                
+               /* should always be non-null */
+               if (tp->send_head != NULL &&
+                   !before (ack + tp->snd_wnd, tp->send_head->end_seq))
                {
-                       /*
-                        *      In dangerous area, increase slowly.  In theory this is
-                        *      sk->cong_window += 1 / sk->cong_window
-                        */
-                       if (sk->cong_count >= sk->cong_window) 
-                       {
-                               sk->cong_window++;
-                               sk->cong_count = 0;
-                       }
-                       else 
-                               sk->cong_count++;
+                       tp->backoff = 0;
+                       tp->pending = 0;
+
+                        tcp_clear_xmit_timer(sk, TIME_PROBE0);
+
+               }
+                else
+               {
+                        tcp_reset_xmit_timer(sk, TIME_PROBE0, 
+                                            min(tp->rto << tp->backoff, 
+                                                120*HZ));
                }
        }
 
-       /*
-        *      Remember the highest ack received and update the
-        *      right hand window edge of the host.
-        *      We do a bit of work here to track number of times we've
-        *      seen this ack without a change in the right edge of the
-        *      window and no data in the packet.
-        *      This will allow us to do fast retransmits.
-        */
-
-       /* We are looking for duplicate ACKs here.
-        * An ACK is a duplicate if:
-        * (1) it has the same sequence number as the largest number we've seen,
-        * (2) it has the same window as the last ACK,
-        * (3) we have outstanding data that has not been ACKed
-        * (4) The packet was not carrying any data.
-        * (5) [From Floyd's paper on fast retransmit wars]
-        *     The packet acked data after high_seq;
-        * I've tried to order these in occurrence of most likely to fail
-        * to least likely to fail.
-        * [These are an extension of the rules BSD stacks use to
-        *  determine if an ACK is a duplicate.]
+       /* 
+        *      See if we can take anything off of the retransmit queue.
         */
+   
+       start_bh_atomic();
 
-       if (sk->rcv_ack_seq == ack
-               && sk->window_seq == window_seq
-               && len != th->doff*4
-               && before(ack, sk->sent_seq)
-               && after(ack, sk->high_seq))
-       {
-               /* Prevent counting of duplicate ACKs if the congestion
-                * window is smaller than 3. Note that since we reduce
-                * the congestion window when we do a fast retransmit,
-                * we must be careful to keep counting if we were already
-                * counting. The idea behind this is to avoid doing
-                * fast retransmits if the congestion window is so small
-                * that we cannot get 3 ACKs due to the loss of a packet
-                * unless we are getting ACKs for retransmitted packets.
-                */
-               if (sk->cong_window >= 3 || sk->rcv_ack_cnt > MAX_DUP_ACKS+1)
-                       sk->rcv_ack_cnt++;
-               /* See draft-stevens-tcpca-spec-01 for explanation
-                * of what we are doing here.
-                */
-               if (sk->rcv_ack_cnt == MAX_DUP_ACKS+1) {
-                       int tmp;
-
-                       /* We need to be a bit careful to preserve the
-                        * count of packets that are out in the system here.
-                        */
-                       sk->ssthresh = max(sk->cong_window >> 1, 2);
-                       sk->cong_window = sk->ssthresh+MAX_DUP_ACKS+1;
-                       tmp = sk->packets_out;
-                       tcp_do_retransmit(sk,0);
-                       sk->packets_out = tmp;
-               } else if (sk->rcv_ack_cnt > MAX_DUP_ACKS+1) {
-                       sk->cong_window++;
-                       /*
-                       * At this point we are suppose to transmit a NEW
-                       * packet (not retransmit the missing packet,
-                       * this would only get us into a retransmit war.)
-                       * I think that having just adjusted cong_window
-                       * we will transmit the new packet below.
-                       */
-               }
-       }
-       else
+       while(((skb=skb_peek(&sk->write_queue)) != NULL) &&
+             (skb != tp->send_head))
        {
-               if (sk->rcv_ack_cnt > MAX_DUP_ACKS) {
-                       sk->cong_window = sk->ssthresh;
-               }
-               sk->window_seq = window_seq;
-               sk->rcv_ack_seq = ack;
-               sk->rcv_ack_cnt = 1;
-       }
-       
-       /*
-        *      We passed data and got it acked, remove any soft error
-        *      log. Something worked...
-        */
-        
-       sk->err_soft = 0;
-
-       /*
-        *      If this ack opens up a zero window, clear backoff.  It was
-        *      being used to time the probes, and is probably far higher than
-        *      it needs to be for normal retransmission.
-        */
+               /* Check for a bug. */
 
-       if (sk->ip_xmit_timeout == TIME_PROBE0) 
-       {
-               sk->retransmits = 0;    /* Our probe was answered */
-               
+               if (skb->next != (struct sk_buff*) &sk->write_queue &&
+                   after(skb->end_seq, skb->next->seq)) 
+                       printk("INET: tcp_input.c: *** "
+                              "bug send_list out of order.\n");
+                                                               
                /*
-                *      Was it a usable window open ?
+                *      If our packet is before the ack sequence we can
+                *      discard it as it's confirmed to have arrived the 
+                *      other end.
                 */
                 
-               if (!skb_queue_empty(&sk->write_queue) &&   /* should always be true */
-                   ! before (sk->window_seq, sk->write_queue.next->end_seq)) 
+               if (!after(skb->end_seq, ack)) 
                {
-                       sk->backoff = 0;
+                       if (sk->debug)
+                       {
+                               printk(KERN_DEBUG "removing seg %x-%x from "
+                                      "retransmit queue\n",
+                                      skb->seq, skb->end_seq);
+                       }
                        
-                       /*
-                        *      Recompute rto from rtt.  this eliminates any backoff.
-                        */
+                       tp->retrans_head = NULL;
+                                               
+                       flag |= FLAG_DATA_ACKED;
+                       seq = skb->seq;
+                       seq_rtt = jiffies - skb->when;
+                       
+                       skb_unlink(skb);
+                       atomic_dec(&sk->packets_out);
+                       skb->free = 1;
 
-                       /*
-                        * Appendix C of Van Jacobson's final version of
-                        * the SIGCOMM 88 paper states that although
-                        * the original paper suggested that
-                        *  RTO = R*2V
-                        * was the correct calculation experience showed
-                        * better results using
-                        *  RTO = R*4V
-                        * In particular this gives better performance over
-                        * slow links, and should not effect fast links.
-                        *
-                        * Note: Jacobson's algorithm is fine on BSD which
-                        * has a 1/2 second granularity clock, but with our
-                        * 1/100 second granularity clock we become too
-                        * sensitive to minor changes in the round trip time.
-                        * We add in two compensating factors.
-                        * First we multiply by 5/4. For large congestion
-                        * windows this allows us to tolerate burst traffic
-                        * delaying up to 1/4 of our packets.
-                        * We also add in a rtt / cong_window term.
-                        * For small congestion windows this allows
-                        * a single packet delay, but has negligible effect
-                        * on the compensation for large windows.
-                        */
-                       sk->rto = (sk->rtt >> 3) + sk->mdev;
-                       sk->rto += (sk->rto>>2) + (sk->rto >> (sk->cong_window-1));
-                       if (sk->rto > 120*HZ)
-                               sk->rto = 120*HZ;
-                       if (sk->rto < HZ/5)     /* Was 1*HZ, then 1 - turns out we must allow about
-                                                  .2 of a second because of BSD delayed acks - on a 100Mb/sec link
-                                                  .2 of a second is going to need huge windows (SIGH) */
-                       sk->rto = HZ/5;
+                       kfree_skb(skb, FREE_WRITE);
+                       
+                       if (!sk->dead)
+                               sk->write_space(sk);
+               }
+               else
+               {
+                       break;
                }
        }
 
+       end_bh_atomic();
+
        /* 
-        *      See if we can take anything off of the retransmit queue.
+        * if we where retransmiting don't count rtt estimate
         */
 
-       for (;;) {
-               struct sk_buff * skb = sk->send_head;
-               if (!skb)
-                       break;
-
-               /* Check for a bug. */
-               if (skb->link3 && after(skb->end_seq, skb->link3->end_seq)) 
-                       printk("INET: tcp.c: *** bug send_list out of order.\n");
-                       
-               /*
-                *      If our packet is before the ack sequence we can
-                *      discard it as it's confirmed to have arrived the other end.
-                */
-                
-               if (after(skb->end_seq, ack))
-                       break;
-
-               if (sk->retransmits) 
-               {
-                       /*
-                        *      We were retransmitting.  don't count this in RTT est 
-                        */
-                       flag |= 2;
-               }
-
-               if ((sk->send_head = skb->link3) == NULL)
-               {
-                       sk->send_tail = NULL;
-                       sk->send_next = NULL;
+       if (sk->retransmits)
+       {
+               if (sk->packets_out == 0)
                        sk->retransmits = 0;
-               }
-
-               /*
-                * advance the send_next pointer if needed.
-                */
-               if (sk->send_next == skb)
-                       sk->send_next = sk->send_head;
-
+       }
+       else
+       {
                /*
                 * Note that we only reset backoff and rto in the
                 * rtt recomputation code.  And that doesn't happen
@@ -946,274 +699,89 @@ static int tcp_ack(struct sock *sk, struct tcphdr *th, u32 ack, int len)
                 * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
                 */
 
-               /*
-                *      We have one less packet out there. 
-                */
-                        
-               if (sk->packets_out > 0) 
-                       sk->packets_out --;
-
-               /* This is really only supposed to be called when we
-                * are actually ACKing new data, which should exclude
-                * the ACK handshake on an initial SYN packet as well.
-                * Rather than introducing a new test here for this
-                * special case, we just reset the initial values for
-                * rtt immediately after we move to the established state.
-                */
-               if (!(flag&2))  /* Not retransmitting */
-                       tcp_rtt_estimator(sk,skb);
-               IS_SKB(skb);
-
-               /*
-                *      We may need to remove this from the dev send list. 
-                */
-               cli();
-               if (skb->next)
-                       skb_unlink(skb);
-               sti();
-               kfree_skb(skb, FREE_WRITE); /* write. */
-               if (!sk->dead)
-                       sk->write_space(sk);
-       }
-
-       /*
-        * Maybe we can take some stuff off of the write queue,
-        * and put it onto the xmit queue.
-        * There is bizarre case being tested here, to check if
-        * the data at the head of the queue ends before the start of
-        * the sequence we already ACKed. This is not an error,
-        * it can occur when we send a packet directly off of the write_queue
-        * in a zero window probe.
-        */
-
-       if (!skb_queue_empty(&sk->write_queue) &&
-               !before(sk->window_seq, sk->write_queue.next->end_seq) &&
-               (sk->retransmits == 0 || 
-                sk->ip_xmit_timeout != TIME_WRITE ||
-                !after(sk->write_queue.next->end_seq, sk->rcv_ack_seq)) &&
-               sk->packets_out < sk->cong_window)
-       {
-               /*
-                *      Add more data to the send queue.
-                */
-               tcp_write_xmit(sk);
-       }
-
-       /*
-        * Reset timers to reflect the new state.
-        *
-        * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
-        * from TCP_CLOSE we don't do anything
-        *
-        * from anything else, if there is queued data (or fin) pending,
-        * we use a TIME_WRITE timeout, if there is data to write but
-        * no room in the window we use TIME_PROBE0, else if keepalive
-        * we reset to a KEEPALIVE timeout, else we delete the timer.
-        *
-        * We do not set flag for nominal write data, otherwise we may
-        * force a state where we start to write itsy bitsy tidbits
-        * of data.
-        */
-
-       switch(sk->state) {
-       case TCP_TIME_WAIT:
-               /*
-                * keep us in TIME_WAIT until we stop getting packets,
-                * reset the timeout.
-                */
-               tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
-               break;
-       case TCP_CLOSE:
-               /*
-                * don't touch the timer.
-                */
-               break;
-       default:
-               /*
-                *      Must check send_head and write_queue
-                *      to determine which timeout to use.
-                */
-               if (sk->send_head) {
-                       tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-               } else if (!skb_queue_empty(&sk->write_queue)
-                       && sk->ack_backlog == 0)
+               if (flag & FLAG_DATA_ACKED)
                {
-                       /* 
-                        * if the write queue is not empty when we get here
-                        * then we failed to move any data to the retransmit
-                        * queue above. (If we had send_head would be non-NULL).
-                        * Furthermore, since the send_head is NULL here
-                        * we must not be in retransmit mode at this point.
-                        * This implies we have no packets in flight,
-                        * hence sk->packets_out < sk->cong_window.
-                        * Examining the conditions for the test to move
-                        * data to the retransmission queue we find that
-                        * we must therefore have a zero window.
-                        * Hence, if the ack_backlog is 0 we should initiate
-                        * a zero probe.
-                        * We don't do a zero probe if we have a delayed
-                        * ACK in hand since the other side may have a
-                        * window opening, but they are waiting to hear
-                        * from us before they tell us about it.
-                        * (They are applying Nagle's rule).
-                        * So, we don't set up the zero window probe
-                        * just yet. We do have to clear the timer
-                        * though in this case...
-                        */
-                       tcp_reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
-               } else if (sk->keepopen) {
-                       tcp_reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
-               } else {
-                       del_timer(&sk->retransmit_timer);
-                       sk->ip_xmit_timeout = 0;
+                       tcp_rtt_estimator(tp, seq_rtt);
+                       if (sysctl_tcp_vegas_cong_avoidance)
+                       {
+                               tcp_cong_avoid_vegas(sk, seq, ack, seq_rtt);
+                       }
+                       else
+                       {
+                               tcp_cong_avoid_vanj(sk, seq, ack, seq_rtt);
+                       }
                }
-               break;
-       }
-
-       /*
-        *      We have nothing queued but space to send. Send any partial
-        *      packets immediately (end of Nagle rule application).
-        */
-        
-       if (sk->packets_out == 0
-           && sk->partial != NULL
-           && skb_queue_empty(&sk->write_queue)
-           && sk->send_head == NULL) 
-       {
-               tcp_send_partial(sk);
        }
 
-       /*
-        * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
-        * we are now waiting for an acknowledge to our FIN.  The other end is
-        * already in TIME_WAIT.
-        *
-        * Move to TCP_CLOSE on success.
-        */
+                       
 
-       if (sk->state == TCP_LAST_ACK) 
+       /* Sanity check out packets_out counter */
+       if (skb_queue_len(&sk->write_queue) == 0 || 
+           ack == tp->snd_nxt ) 
        {
-               if (!sk->dead)
-                       sk->state_change(sk);
-               if(sk->debug)
-                       printk("rcv_ack_seq: %X==%X, acked_seq: %X==%X\n",
-                               sk->rcv_ack_seq,sk->write_seq,sk->acked_seq,sk->fin_seq);
-               if (sk->rcv_ack_seq == sk->write_seq /*&& sk->acked_seq == sk->fin_seq*/) 
+               if (sk->packets_out) 
                {
-                       sk->shutdown = SHUTDOWN_MASK;
-                       tcp_set_state(sk,TCP_CLOSE);
-                       return 1;
-               }
+                       printk(KERN_DEBUG "tcp_ack: packets_out %d\n",
+                              sk->packets_out);
+                        sk->packets_out = 0;
+                }
        }
 
-       /*
-        *      Incoming ACK to a FIN we sent in the case of our initiating the close.
-        *
-        *      Move to FIN_WAIT2 to await a FIN from the other end. Set
-        *      SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in.
-        */
 
-       if (sk->state == TCP_FIN_WAIT1) 
+       if (sk->packets_out)
        {
-
-               if (!sk->dead) 
-                       sk->state_change(sk);
-               if (sk->rcv_ack_seq == sk->write_seq) 
+               if (flag & FLAG_DATA_ACKED)
                {
-                       sk->shutdown |= SEND_SHUTDOWN;
-                       tcp_set_state(sk, TCP_FIN_WAIT2);
-                       /* If the socket is dead, then there is no
-                        * user process hanging around using it.
-                        * We want to set up a FIN_WAIT2 timeout ala BSD.
-                        */
-                       if (sk->dead)
-                               tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_FIN_TIMEOUT);
+                       long when;
+                               
+                       skb = skb_peek(&sk->write_queue);
+               
+                       when = tp->rto - (jiffies - skb->when);
+               
+                       if (when <= 0) 
+                       {
+                               tp->retrans_head = NULL;
+                               /* 
+                                * This is tricky. We are retransmiting a 
+                                * segment of a window when congestion occured.
+                                */
+                               tcp_do_retransmit(sk, 0);
+                               tcp_reset_xmit_timer(sk, TIME_RETRANS,
+                                                    tp->rto);
+                       }
+                       else 
+                               tcp_reset_xmit_timer(sk, TIME_RETRANS, when);
                }
        }
+       else
+               tcp_clear_xmit_timer(sk, TIME_RETRANS);
+       
 
        /*
-        *      Incoming ACK to a FIN we sent in the case of a simultaneous close.
-        *
-        *      Move to TIME_WAIT
+        *      Remember the highest ack received.
         */
+        
+       tp->snd_una = ack;
+
+       tcp_fast_retrans(sk, ack, (flag & (FLAG_DATA|FLAG_WIN_UPDATE)));
 
-       if (sk->state == TCP_CLOSING) 
-       {
 
-               if (!sk->dead) 
-                       sk->state_change(sk);
-               if (sk->rcv_ack_seq == sk->write_seq) 
-               {
-                       tcp_time_wait(sk);
-               }
-       }
-       
-       /*
-        *      Final ack of a three way shake 
-        */
-        
-       if (sk->state==TCP_SYN_RECV)
-       {
-               tcp_set_state(sk, TCP_ESTABLISHED);
-               tcp_options(sk,th);
-               sk->dummy_th.dest=th->source;
-               sk->copied_seq = sk->acked_seq;
-               if(!sk->dead)
-                       sk->state_change(sk);
-               if(sk->max_window==0)
-               {
-                       sk->max_window=32;      /* Sanity check */
-                       sk->mss=min(sk->max_window,sk->mtu);
-               }
-               /* Reset the RTT estimator to the initial
-                * state rather than testing to avoid
-                * updating it on the ACK to the SYN packet.
-                */
-               sk->rtt = 0;
-               sk->rto = TCP_TIMEOUT_INIT;
-               sk->mdev = TCP_TIMEOUT_INIT;
-       }
-       
        /*
-        * The following code has been greatly simplified from the
-        * old hacked up stuff. The wonders of properly setting the
-        * retransmission timeouts.
-        *
-        * If we are retransmitting, and we acked a packet on the retransmit
-        * queue, and there is still something in the retransmit queue,
-        * then we can output some retransmission packets.
+        * Maybe we can take some stuff off of the write queue,
+        * and put it onto the xmit queue.
         */
 
-       if (sk->send_head != NULL && (flag&2) && sk->retransmits)
-       {
-               tcp_do_retransmit(sk, 1);
-       }
 
        return 1;
 
 uninteresting_ack:
+
+       tcp_fast_retrans(sk, ack, 0);
+
        if(sk->debug)
-               printk("Ack ignored %u %u\n",ack,sk->sent_seq);
+               printk("Ack ignored %u %u\n",ack,tp->snd_nxt);
                        
-       /*
-        *      Keepalive processing.
-        */
-                
-       if (after(ack, sk->sent_seq)) 
-       {
-               return 0;
-       }
-               
-       /*
-        *      Restart the keepalive timer.
-        */
-                
-       if (sk->keepopen) 
-       {
-               if(sk->ip_xmit_timeout==TIME_KEEPOPEN)
-                       tcp_reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
-       }
-       return 1;
+       return 0;
 }
 
 
@@ -1237,6 +805,8 @@ static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 {
        sk->fin_seq = skb->end_seq;
 
+       tcp_send_ack(sk);
+
        if (!sk->dead) 
        {
                sk->state_change(sk);
@@ -1249,10 +819,11 @@ static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
                case TCP_SYN_SENT:
                case TCP_ESTABLISHED:
                        /*
-                        * move to CLOSE_WAIT, tcp_data() already handled
-                        * sending the ack.
+                        * move to CLOSE_WAIT
                         */
-                       tcp_set_state(sk,TCP_CLOSE_WAIT);
+
+                       tcp_set_state(sk, TCP_CLOSE_WAIT);
+                       
                        if (th->rst)
                                sk->shutdown = SHUTDOWN_MASK;
                        break;
@@ -1280,27 +851,11 @@ static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
                         * This causes a WRITE timeout, which will either
                         * move on to TIME_WAIT when we timeout, or resend
                         * the FIN properly (maybe we get rid of that annoying
-                        * FIN lost hang). The TIME_WRITE code is already correct
-                        * for handling this timeout.
+                        * FIN lost hang). The TIME_WRITE code is already 
+                        * correct for handling this timeout.
                         */
 
-                       if (sk->ip_xmit_timeout != TIME_WRITE) {
-                               if (sk->send_head)
-                                       tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-                               else if (sk->ip_xmit_timeout != TIME_PROBE0
-                               || skb_queue_empty(&sk->write_queue)) {
-                                       /* BUG check case.
-                                        * We have a problem here if there
-                                        * is no timer running [leads to
-                                        * frozen socket] or no data in the
-                                        * write queue [means we sent a fin
-                                        * and lost it from the queue before
-                                        * changing the ack properly].
-                                        */
-                                       printk(KERN_ERR "Lost timer or fin packet in tcp_fin.\n");
-                               }
-                       }
-                       tcp_set_state(sk,TCP_CLOSING);
+                       tcp_set_state(sk, TCP_CLOSING);
                        break;
                case TCP_FIN_WAIT2:
                        /*
@@ -1326,155 +881,175 @@ static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
        return(0);
 }
 
-/*
- * Add a sk_buff to the TCP receive queue, calculating
- * the ACK sequence as we go..
- */
-static inline void tcp_insert_skb(struct sk_buff * skb, struct sk_buff_head * list)
-{
-       struct sk_buff * prev, * next;
-       u32 seq;
+
 
        /*
-        * Find where the new skb goes.. (This goes backwards,
-        * on the assumption that we get the packets in order)
+        * This one checks to see if we can put data from the
+        * out_of_order queue into the receive_queue
         */
-       seq = skb->seq;
-       prev = list->prev;
-       next = (struct sk_buff *) list;
-       for (;;) {
-               if (prev == (struct sk_buff *) list || !after(prev->seq, seq))
+
+static __inline__ void  tcp_ofo_queue(struct sock *sk)
+{
+       struct sk_buff * skb;
+       struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
+
+       while ((skb = skb_peek(&sk->out_of_order_queue))) {
+               
+               if (after(skb->seq, tp->rcv_nxt))
                        break;
-               next = prev;
-               prev = prev->prev;
+
+               if (!after(skb->end_seq, tp->rcv_nxt)) {
+
+                       if (sk->debug)
+                               printk("ofo packet was allready received \n");
+
+                       skb_unlink(skb);
+                       kfree_skb(skb, FREE_READ);
+                       
+                       continue;
+               }
+
+               if (sk->debug) 
+                       printk("ofo requeuing : rcv_next %X seq %X - %X\n", 
+                              tp->rcv_nxt, skb->seq, skb->end_seq);
+               
+               skb_unlink(skb);
+
+                
+               skb_queue_tail(&sk->receive_queue, skb);
+
+
+               tp->rcv_nxt = skb->end_seq;
        }
-       __skb_insert(skb, prev, next, list);
 }
 
-/*
- * Called for each packet when we find a new ACK endpoint sequence in it
- */
-static inline u32 tcp_queue_ack(struct sk_buff * skb, struct sock * sk)
+static __inline__ void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
+       struct sk_buff * skb1;
+       struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
+
        /*
-        *      When we ack the fin, we do the FIN 
-        *      processing.
+        *  Queue data for delivery to the user
+        *  Packets in sequence go to the receive queue
+        *  Out of sequence packets to out_of_order_queue
         */
-       skb->acked = 1;
-       if (skb->h.th->fin)
-               tcp_fin(skb,sk,skb->h.th);
-       return skb->end_seq;
-}      
 
-static void tcp_queue(struct sk_buff * skb, struct sock * sk, struct tcphdr *th)
-{
-       u32 ack_seq;
 
-       tcp_insert_skb(skb, &sk->receive_queue);
+       if (skb->seq == tp->rcv_nxt) {
+
+               /*
+                * Ok. In sequence.
+                */
+               
+               skb_queue_tail(&sk->receive_queue, skb);
+
+
+               tp->rcv_nxt = skb->end_seq;
+
+               tcp_ofo_queue(sk);
+               
+               if (skb_queue_len(&sk->out_of_order_queue) == 0)
+                       tp->pred_flags = htonl((0x5010 << 16) | tp->snd_wnd);
 
+               return;
+       }
+       
        /*
-        * Did we get anything new to ack?
+        *  Not in sequence
+        *  either a retransmit or some packet got lost
         */
-       ack_seq = sk->acked_seq;
 
+       if (!after(skb->end_seq, tp->rcv_nxt)) {
+               
+               /* 
+                * A retransmit.
+                * 2nd most common case.
+                * force an imediate ack
+                */
 
-       if (!after(skb->seq, ack_seq)) {
-               if (after(skb->end_seq, ack_seq)) {
-                       /* the packet straddles our window end */
-                       struct sk_buff_head * list = &sk->receive_queue;
-                       struct sk_buff * next;
-                       ack_seq = tcp_queue_ack(skb, sk);
+               if (sk->debug) 
+                       printk("retransmit received: seq %X\n", skb->seq);
 
-                       /*
-                        * Do we have any old packets to ack that the above
-                        * made visible? (Go forward from skb)
-                        */
-                       next = skb->next;
-                       while (next != (struct sk_buff *) list) {
-                               if (after(next->seq, ack_seq))
-                                       break;
-                               if (after(next->end_seq, ack_seq))
-                                       ack_seq = tcp_queue_ack(next, sk);
-                               next = next->next;
-                       }
+               sk->delayed_acks = MAX_DELAY_ACK;
+               kfree_skb(skb, FREE_READ);
 
-                       /*
-                        * Ok, we found new data, update acked_seq as
-                        * necessary (and possibly send the actual
-                        * ACK packet).
-                        */
-                       sk->acked_seq = ack_seq;
+               return;
+       }
 
-               } else {
-                       if (sk->debug)
-                               printk("Ack duplicate packet.\n");
-                       tcp_send_ack(sk);
-                       return;
-               }
 
+       if (before(skb->seq, tp->rcv_nxt)) {
 
                /*
-                * Delay the ack if possible.  Send ack's to
-                * fin frames immediately as there shouldn't be
-                * anything more to come.
+                * Partial packet
+                * seq < rcv_next < end_seq
                 */
-               if (!sk->delay_acks || th->fin) {
-                       tcp_send_ack(sk);
-               } else {
-                       /*
-                        * If psh is set we assume it's an
-                        * interactive session that wants quick
-                        * acks to avoid nagling too much. 
-                        */
-                       int delay = HZ/2;
-                       if (th->psh)
-                               delay = HZ/50;
-                       tcp_send_delayed_ack(sk, delay, sk->ato);
-               }
 
-               /*
-                *      Tell the user we have some more data.
-                */
+               if (sk->debug) 
+                       printk("partial packet: rcv_next %X seq %X - %X\n", 
+                              tp->rcv_nxt, skb->seq, skb->end_seq);
+               
+               skb_queue_tail(&sk->receive_queue, skb);
 
-               if (!sk->dead)
-                       sk->data_ready(sk,0);
 
+               tp->rcv_nxt = skb->end_seq;
+
+               tcp_ofo_queue(sk);
+
+               if (skb_queue_len(&sk->out_of_order_queue) == 0)
+                       tp->pred_flags = htonl((0x5010 << 16) | tp->snd_wnd);
+
+               return;         
        }
-       else
-       {
-           /*
-            *  If we've missed a packet, send an ack.
-            *  Also start a timer to send another.
-            *
-            *  4.3reno machines look for these kind of acks so
-            *  they can do fast recovery. Three identical 'old'
-            *  acks lets it know that one frame has been lost
-            *      and should be resent. Because this is before the
-            *  whole window of data has timed out it can take
-            *  one lost frame per window without stalling.
-            *  [See Jacobson RFC1323, Stevens TCP/IP illus vol2]
-            *
-            *  We also should be spotting triple bad sequences.
-            *  [We now do this.]
-            *
-            */
-            
-           if (!skb->acked) 
-           {
-                   if(sk->debug)
-                           printk("Ack past end of seq packet.\n");
-                   tcp_send_ack(sk);
-                   /*
-                    * We need to be very careful here. We must
-                    * not violate Jacobsons packet conservation condition.
-                    * This means we should only send an ACK when a packet
-                    * leaves the network. We can say a packet left the
-                    * network when we see a packet leave the network, or
-                    * when an rto measure expires.
-                    */
-                   tcp_send_delayed_ack(sk,sk->rto,sk->rto);
-           }
+
+       /* 
+        * Ok. This is an out_of_order segment 
+        */
+       
+       /* Force an ack */
+       
+       sk->delayed_acks = MAX_DELAY_ACK;
+
+       /*
+        *      disable header predition
+        */
+
+       tp->pred_flags = 0;
+
+       if (sk->debug) 
+               printk("out of order segment: rcv_next %X seq %X - %X\n", 
+                      tp->rcv_nxt, skb->seq, skb->end_seq);
+
+       if (skb_peek(&sk->out_of_order_queue) == NULL) {
+               skb_queue_head(&sk->out_of_order_queue,skb);
        }
+       else 
+               for(skb1=sk->out_of_order_queue.prev; ; skb1 = skb1->prev) {
+
+                       /* allready there */
+                       if (skb->seq==skb1->seq && skb->len>=skb1->len)
+                       {
+                               skb_append(skb1,skb);
+                               skb_unlink(skb1);
+                               kfree_skb(skb1,FREE_READ);
+                               break;
+                       }
+                       
+                       if (after(skb->seq, skb1->seq))
+                       {
+                               skb_append(skb1,skb);
+                               break;
+                       }
+                       
+                        /*
+                        *      See if we've hit the start. If so insert.
+                        */
+                       if (skb1 == skb_peek(&sk->out_of_order_queue)) {
+                               skb_queue_head(&sk->out_of_order_queue,skb);
+                               break;
+                       }
+               }
+                       
 }
 
 
@@ -1484,117 +1059,124 @@ static void tcp_queue(struct sk_buff * skb, struct sock * sk, struct tcphdr *th)
  *     room, then we will just have to discard the packet.
  */
 
-static int tcp_data(struct sk_buff *skb, struct sock *sk, 
-        unsigned long saddr, unsigned int len)
+static int tcp_data(struct sk_buff *skb, struct sock *sk, unsigned int len)
 {
        struct tcphdr *th;
-       u32 new_seq, shut_seq;
+       struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
 
        th = skb->h.th;
        skb_pull(skb,th->doff*4);
        skb_trim(skb,len-(th->doff*4));
 
+        if (skb->len == 0 && !th->fin)
+        {
+               return(0);
+        }
+
+       /*
+        *      FIXME: don't accept data after the receved fin
+        */
+
        /*
-        *      The bytes in the receive read/assembly queue has increased. Needed for the
-        *      low memory discard algorithm 
+        *      The bytes in the receive read/assembly queue has increased. 
+        *      Needed for the low memory discard algorithm 
         */
           
        sk->bytes_rcv += skb->len;
-       
-       if (skb->len == 0 && !th->fin) 
+               
+       /*
+        *      We no longer have anyone receiving data on this connection.
+        */
+
+       tcp_data_queue(sk, skb);
+
+       if (before(tp->rcv_nxt, sk->copied_seq)) 
        {
-               /* 
-                *      Don't want to keep passing ack's back and forth. 
-                *      (someone sent us dataless, boring frame)
-                */
-               if (!th->ack)
-                       tcp_send_ack(sk);
-               kfree_skb(skb, FREE_READ);
-               return(0);
+               printk("*** tcp.c:tcp_data bug acked < copied\n");
+               tp->rcv_nxt = sk->copied_seq;
        }
 
+       sk->delayed_acks++;
+       
 
        /*
-        *      We no longer have anyone receiving data on this connection.
+        *      Now tell the user we may have some data. 
         */
+        
+       if (!sk->dead) 
+       {
+               if(sk->debug)
+                       printk("Data wakeup.\n");
+               sk->data_ready(sk,0);
+       } 
+       return(1);
+}
 
-#ifndef TCP_DONT_RST_SHUTDOWN           
+static void tcp_data_snd_check(struct sock *sk)
+{
+       struct sk_buff *skb;
+       struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
 
-       if(sk->shutdown & RCV_SHUTDOWN)
+       if ((skb = tp->send_head)) 
        {
-               /*
-                *      FIXME: BSD has some magic to avoid sending resets to
-                *      broken 4.2 BSD keepalives. Much to my surprise a few non
-                *      BSD stacks still have broken keepalives so we want to
-                *      cope with it.
-                */
-
-               if(skb->len)    /* We don't care if it's just an ack or
-                                  a keepalive/window probe */
+               if (!after(skb->end_seq, tp->snd_una + tp->snd_wnd) &&
+                   sk->packets_out < sk->cong_window )
                {
-                       new_seq = skb->seq + skb->len + th->syn;        /* Right edge of _data_ part of frame */
-                       
-                       /* Do this the way 4.4BSD treats it. Not what I'd
-                          regard as the meaning of the spec but it's what BSD
-                          does and clearly they know everything 8) */
-
                        /*
-                        *      This is valid because of two things
-                        *
-                        *      a) The way tcp_data behaves at the bottom.
-                        *      b) A fin takes effect when read not when received.
+                        *      Add more data to the send queue.
                         */
-                        
-                       shut_seq = sk->acked_seq+1;     /* Last byte */
-                       
-                       if(after(new_seq,shut_seq))
-                       {
-                               if(sk->debug)
-                                       printk("Data arrived on %p after close [Data right edge %X, Socket shut on %X] %d\n",
-                                               sk, new_seq, shut_seq, sk->blog);
-                               if(sk->dead)
-                               {
-                                       sk->acked_seq = new_seq + th->fin;
-                                       tcp_send_reset(sk->saddr, sk->daddr, skb->h.th,
-                                               sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
-                                       tcp_statistics.TcpEstabResets++;
-                                       sk->err = EPIPE;
-                                       sk->error_report(sk);
-                                       sk->shutdown = SHUTDOWN_MASK;
-                                       tcp_set_state(sk,TCP_CLOSE);
-                                       kfree_skb(skb, FREE_READ);
-                                       return 0;
-                               }
-                       }
+
+                       tcp_write_xmit(sk);
+                       wake_up_interruptible(sk->sleep);
                }
+               else if (sk->packets_out == 0 && !tp->pending)
+               {
+                       /*
+                        *      Data to queue but no room.
+                        */
+                       tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
+               }               
        }
+}      
 
-#endif
-
+static __inline__ void tcp_ack_snd_check(struct sock *sk)
+{
        /*
-        * We should only call this if there is data in the frame.
-        */
-       tcp_delack_estimator(sk);
+        *      This also takes care of updating the window.
+        *      This if statement needs to be simplified.
+        *
+        *      rules for delaying an ack:
+        *      - delay time <= 0.5 HZ
+        *      - we don't have a window update to send
+        *      - must send at least every 2 full sized packets
+        */
 
-       tcp_queue(skb, sk, th);
+       if (sk->delayed_acks == 0)
+               return;
 
-       return(0);
+       if (sk->delayed_acks >= MAX_DELAY_ACK || tcp_raise_window(sk)) 
+       {
+               tcp_send_ack(sk);
+       }
+       else 
+       {       
+               tcp_send_delayed_ack(sk, HZ/2);         
+       }
 }
 
-
 /*
  *     This routine is only called when we have urgent data
  *     signalled. Its the 'slow' part of tcp_urg. It could be
  *     moved inline now as tcp_urg is only called from one
  *     place. We handle URGent data wrong. We have to - as
  *     BSD still doesn't use the correction from RFC961.
- *
  *     For 1003.1g we should support a new option TCP_STDURG to permit
  *     either form.
  */
  
 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
 {
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
        u32 ptr = ntohs(th->urg_ptr);
 
        if (ptr)
@@ -1628,6 +1210,9 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
                sk->copied_seq++;       /* Move the copied sequence on correctly */
        sk->urg_data = URG_NOTYET;
        sk->urg_seq = ptr;
+
+       /* disable header prediction */
+       tp->pred_flags = 0;
 }
 
 /*
@@ -1662,429 +1247,430 @@ static inline void tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long len
        }
 }
 
-/*
- * This should be a bit smarter and remove partially
- * overlapping stuff too, but this should be good
- * enough for any even remotely normal case (and the
- * worst that can happen is that we have a few
- * unnecessary packets in the receive queue).
- *
- * This function is never called with an empty list..
- */
-static inline void tcp_remove_dups(struct sk_buff_head * list)
-{
-       struct sk_buff * next = list->next;
 
-       for (;;) {
-               struct sk_buff * skb = next;
-               next = next->next;
-               if (next == (struct sk_buff *) list)
-                       break;
-               if (before(next->end_seq, skb->end_seq)) {
-                       __skb_unlink(next, list);
-                       kfree_skb(next, FREE_READ);
-                       next = skb;
-                       continue;
-               }
-               if (next->seq != skb->seq)
-                       continue;
-               __skb_unlink(skb, list);
-               kfree_skb(skb, FREE_READ);
-       }
-}
-
-/*
- * Throw out all unnecessary packets: we've gone over the
- * receive queue limit. This shouldn't happen in a normal
- * TCP connection, but we might have gotten duplicates etc.
- */
-static void prune_queue(struct sk_buff_head * list)
+static __inline__ void prune_queue(struct sock *sk)
 {
-       for (;;) {
-               struct sk_buff * skb = list->prev;
+       struct sk_buff * skb;
 
-               /* gone through it all? */
-               if (skb == (struct sk_buff *) list)
-                       break;
-               if (!skb->acked) {
-                       __skb_unlink(skb, list);
-                       kfree_skb(skb, FREE_READ);
-                       continue;
-               }
-               tcp_remove_dups(list);
-               break;
+       /*
+        *      clean the out_of_order queue
+        */
+
+       while ((skb = skb_dequeue(&sk->out_of_order_queue))) 
+       {
+               kfree_skb(skb, FREE_READ);
        }
 }
 
-#ifdef CONFIG_IP_TRANSPARENT_PROXY
-/*
- *     Check whether a received TCP packet might be for one of our
- *     connections.
- */
-
-int tcp_chkaddr(struct sk_buff *skb)
-{
-       struct iphdr *iph = skb->h.iph;
-       struct tcphdr *th = (struct tcphdr *)(skb->h.raw + iph->ihl*4);
-       struct sock *sk;
-
-       sk = get_sock(&tcp_prot, th->dest, iph->saddr, th->source, iph->daddr, 0, 0);
 
-       if (!sk) return 0;
-       /* 0 means accept all LOCAL addresses here, not all the world... */
-       if (sk->rcv_saddr == 0) return 0;
-       return 1;
-}
-#endif
-
-/*
- *     A TCP packet has arrived.
- *             skb->h.raw is the TCP header.
- */
-int tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
-       __u32 daddr, unsigned short len,
-       __u32 saddr, int redo, struct inet_protocol * protocol)
+void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
+                        struct tcphdr *th, __u16 len)
 {
-       struct tcphdr *th;
-       struct sock *sk;
-       __u32 seq;
-#ifdef CONFIG_IP_TRANSPARENT_PROXY
-       int r;
-#endif
+       struct tcp_opt *tp;
+       int queued = 0;
+       u32 flg;
+       
+       /*
+        *      Header prediction.
+        *      The code follows the one in the famous 
+        *      "30 instruction TCP receive" Van Jacobson mail.
+        *      
+        *      Van's trick is to deposit buffers into socket queue 
+        *      on a device interrupt, to call tcp_recv function
+        *      on the receive process context and checksum and copy
+        *      the buffer to user space. smart...
+        *
+        *      Our current scheme is not silly either but we take the 
+        *      extra cost of the net_bh soft interrupt processing...
+        *      We do checksum and copy also but from device to kernel.
+        */
 
+       tp = &(sk->tp_pinfo.af_tcp); 
+       flg = *(((u32 *)th) + 3);
+               
        /*
-        * "redo" is 1 if we have already seen this skb but couldn't
-        * use it at that time (the socket was locked).  In that case
-        * we have already done a lot of the work (looked up the socket
-        * etc).
+        *      pred_flags is 0x5?10 << 16 + snd_wnd
+        *      if header_predition is to be made
+        *      ? will be 0 else it will be !0
+        *      (when there are holes in the receive 
+        *       space for instance)
         */
-       th = skb->h.th;
-       sk = skb->sk;
-       if (!redo) {
-               tcp_statistics.TcpInSegs++;
-               if (skb->pkt_type!=PACKET_HOST)
-                       goto discard_it;
 
-               /*
-                *      Pull up the IP header.
-                */
-       
-               skb_pull(skb, skb->h.raw-skb->data);
+       if (flg == tp->pred_flags && skb->seq == tp->rcv_nxt)
+       {
+               if (len <= sizeof(struct tcphdr))
+               {
+                       if (len == sizeof(struct tcphdr))
+                       {
+                               tcp_ack(sk, th, skb->seq, skb->ack_seq, len);
+                       }
 
-               /*
-                *      Try to use the device checksum if provided.
-                */
-               switch (skb->ip_summed) 
+                       tcp_data_snd_check(sk);
+
+                       kfree_skb(skb, FREE_READ);
+                       return;
+                       
+               }
+               else if (skb->ack_seq == tp->snd_una)
                {
-                       case CHECKSUM_NONE:
-                               skb->csum = csum_partial((char *)th, len, 0);
-                       case CHECKSUM_HW:
-                               if (tcp_check(th, len, saddr, daddr, skb->csum))
-                                       goto discard_it;
-                       default:
-                               /* CHECKSUM_UNNECESSARY */
+                       /* 
+                        * Bulk data transfer: receiver 
+                        */
+                       
+                       skb_pull(skb,sizeof(struct tcphdr));
+                       
+                       skb_queue_tail(&sk->receive_queue, skb);
+                       tp->rcv_nxt = skb->end_seq;
+                       sk->bytes_rcv += len - sizeof(struct tcphdr);
+                       
+                       sk->data_ready(sk, 0);
+                       tcp_delack_estimator(tp);
+
+                       if (sk->delayed_acks++)
+                       {
+                               tcp_send_delayed_ack(sk, HZ/2);
+                       }
+                       else
+                               tcp_send_ack(sk);
+
+                       return;
                }
-               sk = get_tcp_sock(saddr, th->source, daddr, th->dest, dev->pa_addr, skb->redirport);
-               if (!sk)
-                       goto no_tcp_socket;
-               skb->sk = sk;
-               skb->seq = ntohl(th->seq);
-               skb->end_seq = skb->seq + th->syn + th->fin + len - th->doff*4;
-               skb->ack_seq = ntohl(th->ack_seq);
-
-               skb->acked = 0;
-               skb->used = 0;
-               skb->free = 1;
-               skb->saddr = daddr;
-               skb->daddr = saddr;
+       }
 
-               /*
-                * We may need to add it to the backlog here. 
-                */
-               if (sk->users) 
+       if (!tcp_sequence(tp, skb->seq, skb->end_seq))
+       {
+               if (!th->rst)
                {
-                       __skb_queue_tail(&sk->back_log, skb);
-                       return(0);
+                       if (after(skb->seq, tp->rcv_nxt))
+                       {
+                               printk(KERN_DEBUG "->seq:%d end:%d "
+                                      "wup:%d wnd:%d\n",
+                                      skb->seq, skb->end_seq, 
+                                      tp->rcv_wup, tp->rcv_wnd);
+                       }
+                       tcp_send_ack(sk);
+                       kfree_skb(skb, FREE_READ);
+                       return;
                }
        }
 
+       if(th->syn && skb->seq != sk->syn_seq)
+       {
+               printk(KERN_DEBUG "syn in established state\n");
+               tcp_reset(sk, skb);
+               kfree_skb(skb, FREE_READ);
+               return;
+       }
+       
+       if(th->rst)
+       {
+               tcp_reset(sk,skb);
+               kfree_skb(skb, FREE_READ);
+               return;
+       }
+       
+       if(th->ack)
+       {
+               tcp_ack(sk, th, skb->seq, skb->ack_seq, len);
+       }
+
+       
        /*
-        *      If this socket has got a reset it's to all intents and purposes 
-        *      really dead. Count closed sockets as dead.
-        *
-        *      Note: BSD appears to have a bug here. A 'closed' TCP in BSD
-        *      simply drops data. This seems incorrect as a 'closed' TCP doesn't
-        *      exist so should cause resets as if the port was unreachable.
+        *      Process urgent data
+        */
+
+       tcp_urg(sk, th, len);
+
+       /*
+        *      step 7: process the segment text
         */
 
-       if (sk->zapped || sk->state==TCP_CLOSE)
-               goto no_tcp_socket;
 
-       if (!sk->prot) 
+       queued = tcp_data(skb, sk, len);
+
+       /*
+        *      step 8: check the FIN bit
+        */
+
+       if (th->fin)
        {
-               printk(KERN_CRIT "IMPOSSIBLE 3\n");
-               return(0);
+               tcp_fin(skb, sk, th);
        }
 
+       tcp_data_snd_check(sk);
+       tcp_ack_snd_check(sk);
 
        /*
-        *      Charge the memory to the socket. 
+        *      If our receive queue has grown past its limits,
+        *      try to prune away duplicates etc..
         */
-        
-       skb->sk=sk;
-       atomic_add(skb->truesize, &sk->rmem_alloc);
+       if (sk->rmem_alloc > sk->rcvbuf)
+               prune_queue(sk);
 
        /*
-        * Mark the time of the last received packet.
-        */
-       sk->idletime = jiffies;
+        *      And done
+        */     
        
+       if (queued)
+               return;
+
+       kfree_skb(skb, FREE_READ);
+}
+               
+
+/*
+ *     This function implements the receiving procedure of RFC 793.
+ *     It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
+ *     address independent.
+ */
+       
+int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+                         struct tcphdr *th, void *opt, __u16 len)
+{
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       int queued = 0;
+       int rcv_mss;
+
        /*
-        *      We should now do header prediction.
-        */
-        
-       /*
-        *      This basically follows the flow suggested by RFC793, with the corrections in RFC1122. We
-        *      don't implement precedence and we process URG incorrectly (deliberately so) for BSD bug
-        *      compatibility. We also set up variables more thoroughly [Karn notes in the
-        *      KA9Q code the RFC793 incoming segment rules don't initialise the variables for all paths].
+        *      state == CLOSED
+        *      tested in tcp_v{4,6}_rcv
         */
 
-       if(sk->state!=TCP_ESTABLISHED)          /* Skip this lot for normal flow */
-       {
-       
-               /*
-                *      Now deal with unusual cases.
+       switch (sk->state) {
+
+
+       case TCP_LISTEN:
+               
+               if (th->rst)                    
+                       goto discard;
+
+               /* 
+                * These use the socket TOS.. 
+                * might want to be the received TOS 
                 */
-        
-               if(sk->state==TCP_LISTEN)
-               {
-                       if(th->ack)     /* These use the socket TOS.. might want to be the received TOS */
-                               tcp_send_reset(daddr,saddr,th,sk->prot,opt,dev,sk->ip_tos, sk->ip_ttl);
 
+               if(th->ack)
+               {       
                        /*
-                        *      We don't care for RST, and non SYN are absorbed (old segments)
-                        *      Broadcast/multicast SYN isn't allowed. Note - bug if you change the
-                        *      netmask on a running connection it can go broadcast. Even Sun's have
-                        *      this problem so I'm ignoring it 
+                        *  send reset
                         */
-                          
-#ifdef CONFIG_IP_TRANSPARENT_PROXY
-                       /*
-                        * We may get non-local addresses and still want to
-                        * handle them locally, due to transparent proxying.
-                        * Thus, narrow down the test to what is really meant.
-                        */
-                       if(th->rst || !th->syn || th->ack || (r = ip_chk_addr(daddr)) == IS_BROADCAST || r == IS_MULTICAST)
-#else
-                       if(th->rst || !th->syn || th->ack || ip_chk_addr(daddr)!=IS_MYADDR)
-#endif
-                       {
-                               kfree_skb(skb, FREE_READ);
-                               return 0;
-                       }
+
+                       return 1;
+               }
                
-                       /*      
-                        *      Guess we need to make a new socket up
-                        */
-                       seq = secure_tcp_sequence_number(saddr, daddr,
-                                                        skb->h.th->dest,
-                                                        skb->h.th->source);
-                       tcp_conn_request(sk, skb, daddr, saddr, opt, dev, seq);
                
+               if(th->syn)
+               {
+                       int err;
+                       __u32 isn;
+
+                       isn = tp->af_specific->init_sequence(sk, skb);
+                       err = tp->af_specific->conn_request(sk, skb, opt, isn);
+
+                       if (err < 0)
+                               return 1;
+
                        /*
-                        *      Now we have several options: In theory there is nothing else
-                        *      in the frame. KA9Q has an option to send data with the syn,
-                        *      BSD accepts data with the syn up to the [to be] advertised window
-                        *      and Solaris 2.1 gives you a protocol error. For now we just ignore
-                        *      it, that fits the spec precisely and avoids incompatibilities. It
-                        *      would be nice in future to drop through and process the data.
+                        *  Now we have several options: In theory there is 
+                        *  nothing else in the frame. KA9Q has an option to 
+                        *  send data with the syn, BSD accepts data with the
+                        *  syn up to the [to be] advertised window and 
+                        *  Solaris 2.1 gives you a protocol error. For now 
+                        *  we just ignore it, that fits the spec precisely 
+                        *  and avoids incompatibilities. It would be nice in
+                        *  future to drop through and process the data.
                         *
-                        *      Now TTCP is starting to use we ought to queue this data.
+                        *  Now that TTCP is starting to be used we ought to 
+                        *  queue this data.
                         */
-                        
-                       return 0;
-               }
-       
-               /* 
-                *      Retransmitted SYN for our socket. This is uninteresting. If sk->state==TCP_LISTEN
-                *      then it's a new connection
-                */
-                
-               if (sk->state == TCP_SYN_RECV && th->syn && skb->seq+1 == sk->acked_seq)
-               {
-                       kfree_skb(skb, FREE_READ);
+
                        return 0;
                }
                
+               goto discard;
+               break;
+
+       case TCP_SYN_SENT:
+               
                /*
-                *      SYN sent means we have to look for a suitable ack and either reset
-                *      for bad matches or go to connected. The SYN_SENT case is unusual and should
+                *      SYN sent means we have to look for a suitable ack and 
+                *      either reset for bad matches or go to connected. 
+                *      The SYN_SENT case is unusual and should
                 *      not be in line code. [AC]
                 */
           
-               if(sk->state==TCP_SYN_SENT)
+               if(th->ack)
                {
-                       /* Crossed SYN or previous junk segment */
-                       if(th->ack)
+                       /* We got an ack, but it's not a good ack */
+                       if(!tcp_ack(sk,th, skb->seq, skb->ack_seq, len))
                        {
-                               /* We got an ack, but it's not a good ack.
-                                * We used to test this with a call to tcp_ack,
-                                * but this loses, because it takes the SYN
-                                * packet out of the send queue, even if
-                                * the ACK doesn't have the SYN bit sent, and
-                                * therefore isn't the one we are waiting for.
-                                */
-                               if (after(skb->ack_seq, sk->sent_seq) || before(skb->ack_seq, sk->rcv_ack_seq))
-                               {
-                                       /* Reset the ack - it's an ack from a 
-                                          different connection  [ th->rst is checked in tcp_send_reset()] */
-                                       tcp_statistics.TcpAttemptFails++;
-                                       tcp_send_reset(daddr, saddr, th,
-                                               sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
-                                       kfree_skb(skb, FREE_READ);
-                                       return(0);
-                               }
-                               if(th->rst)
-                                       return tcp_reset(sk,skb);
-                               if(!th->syn)
-                               {
-                                       /* A valid ack from a different connection
-                                          start. Shouldn't happen but cover it */
-                                       tcp_statistics.TcpAttemptFails++;
-                                       tcp_send_reset(daddr, saddr, th,
-                                               sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
-                                       kfree_skb(skb, FREE_READ);
-                                       return 0;
-                               }
-
-                               /* process the ACK, get the SYN packet out
-                                * of the send queue, do other initial
-                                * processing stuff. [We know it's good, and
-                                * we know it's the SYN,ACK we want.]
-                                */
-                               tcp_ack(sk,th,skb->ack_seq,len);
-
+                               tcp_statistics.TcpAttemptFails++;
+                               return 1;
+                       }
 
-                               /*
-                                *      Ok.. it's good. Set up sequence numbers and
-                                *      move to established.
-                                */
-                               sk->acked_seq = skb->seq+1;
-                               sk->lastwin_seq = skb->seq+1;
-                               sk->fin_seq = skb->seq;
-                               tcp_send_ack(sk);
-                               tcp_set_state(sk, TCP_ESTABLISHED);
-                               tcp_options(sk,th);
-                               sk->dummy_th.dest=th->source;
-                               sk->copied_seq = sk->acked_seq;
-                               if(!sk->dead)
-                               {
-                                       sk->state_change(sk);
-                                       sock_wake_async(sk->socket, 0);
-                               }
-                               if(sk->max_window==0)
-                               {
-                                       sk->max_window = 32;
-                                       sk->mss = min(sk->max_window, sk->mtu);
-                               }
-                               /* Reset the RTT estimator to the initial
-                                * state rather than testing to avoid
-                                * updating it on the ACK to the SYN packet.
-                                */
-                               sk->rtt = 0;
-                               sk->rto = TCP_TIMEOUT_INIT;
-                               sk->mdev = TCP_TIMEOUT_INIT;
+                       if(th->rst)
+                       {
+                               tcp_reset(sk,skb);
+                               goto discard;
                        }
-                       else
+
+                       if(!th->syn)
                        {
-                               /* See if SYN's cross. Drop if boring */
-                               if(th->syn && !th->rst)
-                               {
-                                       /* Crossed SYN's are fine - but talking to
-                                          yourself is right out... */
-                                       if(sk->saddr==saddr && sk->daddr==daddr &&
-                                               sk->dummy_th.source==th->source &&
-                                               sk->dummy_th.dest==th->dest)
-                                       {
-                                               tcp_statistics.TcpAttemptFails++;
-                                               return tcp_reset(sk,skb);
-                                       }
-                                       tcp_set_state(sk,TCP_SYN_RECV);
-                                       
-                                       /*
-                                        *      FIXME:
-                                        *      Must send SYN|ACK here
-                                        */
-                               }               
-                               /* Discard junk segment */
-                               kfree_skb(skb, FREE_READ);
-                               return 0;
+                               /* 
+                                *  A valid ack from a different connection
+                                *  start. Shouldn't happen but cover it 
+                                */
+                               tcp_statistics.TcpAttemptFails++;
+                               return 1;
                        }
+
                        /*
-                        *      SYN_RECV with data maybe.. drop through
+                        *      Ok.. it's good. Set up sequence 
+                        *      numbers and
+                        *      move to established.
                         */
-                       goto rfc_step6;
-               }
 
-       /*
-        *      BSD has a funny hack with TIME_WAIT and fast reuse of a port. There is
-        *      a more complex suggestion for fixing these reuse issues in RFC1644
-        *      but not yet ready for general use. Also see RFC1379.
-        *
-        *      Note the funny way we go back to the top of this function for
-        *      this case ("goto try_next_socket").  That also takes care of
-        *      checking "sk->users" for the new socket as well as doing all
-        *      the normal tests on the packet.
-        */
-       
-#define BSD_TIME_WAIT
-#ifdef BSD_TIME_WAIT
-               if (sk->state == TCP_TIME_WAIT && th->syn && sk->dead && 
-                       after(skb->seq, sk->acked_seq) && !th->rst)
-               {
-                       u32 seq = sk->write_seq;
-                       if(sk->debug)
-                               printk("Doing a BSD time wait\n");
-                       tcp_statistics.TcpEstabResets++;           
-                       atomic_sub(skb->truesize, &sk->rmem_alloc);
-                       skb->sk = NULL;
-                       sk->err=ECONNRESET;
-                       tcp_set_state(sk, TCP_CLOSE);
-                       sk->shutdown = SHUTDOWN_MASK;
-                       sk=get_sock(&tcp_prot, th->dest, saddr, th->source, daddr, dev->pa_addr, skb->redirport);
-                       /* this is not really correct: we should check sk->users */
-                       if (sk && sk->state==TCP_LISTEN)
+                       tp->rcv_nxt = skb->seq+1;
+                       tp->rcv_wnd = 0;
+                       tp->rcv_wup = skb->seq+1;
+
+                       tp->snd_wnd = htons(th->window);
+                       tp->snd_wl1 = skb->seq;
+                       tp->snd_wl2 = skb->ack_seq;
+
+                       sk->fin_seq = skb->seq;
+                       tcp_send_ack(sk);
+
+                       tcp_set_state(sk, TCP_ESTABLISHED);
+                       rcv_mss = tcp_parse_options(th);
+                       
+                       if (rcv_mss == 0)
                        {
-                               skb->sk = sk;
-                               atomic_add(skb->truesize, &sk->rmem_alloc);
-                               tcp_conn_request(sk, skb, daddr, saddr,opt, dev,seq+128000);
-                               return 0;
+                               rcv_mss = 536;
                        }
-                       kfree_skb(skb, FREE_READ);
+
+                       sk->mss = min(sk->mss, rcv_mss);
+                       
+                       sk->dummy_th.dest = th->source;
+                       sk->copied_seq = tp->rcv_nxt;
+
+                       if(!sk->dead)
+                       {
+                               sk->state_change(sk);
+                               sock_wake_async(sk->socket, 0);
+                       }
+
+                       /* Drop through step 6 */
+                       goto step6;
+               }
+               else
+               {
+                       if(th->syn && !th->rst)
+                       {
+                               /* 
+                                * the previous version of the code
+                                * checked for "connecting to self"
+                                * here. that check is done now in
+                                * tcp_connect
+                                */
+
+                               tcp_set_state(sk, TCP_SYN_RECV);
+                               
+                               tp->rcv_nxt = skb->seq + 1;
+                               tp->rcv_wup = skb->seq + 1;
+
+                               tp->snd_wnd = htons(th->window);
+                               tp->snd_wl1 = skb->seq;
+                               
+                               tcp_send_synack(sk);
+                               goto discard;
+                       }               
+
+               }
+               break;
+
+       case TCP_TIME_WAIT:
+               /*
+                *      RFC 1122:
+                *      "When a connection is [...] on TIME-WAIT state [...]
+                *      [a TCP] MAY accept a new SYN from the remote TCP to
+                *      reopen the connection directly, if it:
+                *      
+                *      (1)  assigns its initial sequence number for the new
+                 *     connection to be larger than the largest sequence
+                 *     number it used on the previous connection incarnation,
+                 *     and
+                *
+                *      (2)  returns to TIME-WAIT state if the SYN turns out 
+                *      to be an old duplicate".
+                */
+
+               if (th->syn && !th->rst && after(skb->seq, tp->rcv_nxt))
+               {
+                       __u32 isn;
+                       int err;
+
+                        atomic_sub(skb->truesize, &sk->rmem_alloc);
+                        skb->sk = NULL;
+                        sk->err = ECONNRESET;
+                        tcp_set_state(sk, TCP_CLOSE);
+                        sk->shutdown = SHUTDOWN_MASK;
+
+                       isn = tp->rcv_nxt + 128000;
+
+                       sk = tp->af_specific->get_sock(skb, th);
+
+                       if (sk == NULL)
+                               goto discard;
+
+                       skb->sk = sk;
+                       tp = &sk->tp_pinfo.af_tcp;
+                       atomic_add(skb->truesize, &sk->rmem_alloc);
+                       
+                       err = tp->af_specific->conn_request(sk, skb, opt, isn);
+
+                       if (err < 0)
+                               return 1;
+
                        return 0;
                }
-#endif 
+
+               break;
+
        }
 
        /*
-        *      We are now in normal data flow (see the step list in the RFC)
-        *      Note most of these are inline now. I'll inline the lot when
-        *      I have time to test it hard and look at what gcc outputs 
+        *      step 1: check sequence number
         */
 
-       if (!tcp_sequence(sk, skb->seq, skb->end_seq-th->syn))
+       if (!tcp_sequence(tp, skb->seq, skb->end_seq))
        {
-               bad_tcp_sequence(sk, th, skb->end_seq-th->syn, dev);
-               kfree_skb(skb, FREE_READ);
-               return 0;
+               if (!th->rst)
+               {
+                       tcp_send_ack(sk);
+                       goto discard;
+               }
        }
 
+
+       /*
+        *      step 2: check RST bit
+        */
+
        if(th->rst)
-               return tcp_reset(sk,skb);
-       
+       {
+               tcp_reset(sk,skb);
+               goto discard;
+       }
+
+       /*
+        *      step 3: check security and precedence 
+        *      [ignored]
+        */
+
        /*
+        *      step 4:
+        *
         *      Check for a SYN, and ensure it matches the SYN we were
         *      first sent. We have to handle the rather unusual (but valid)
         *      sequence that KA9Q derived products may generate of
@@ -2098,77 +1684,152 @@ int tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
         *      We keep syn_seq as the sequence space occupied by the 
         *      original syn. 
         */
-        
-       if(th->syn && skb->seq!=sk->syn_seq)
+
+       if (th->syn && skb->seq!=sk->syn_seq)
        {
-               tcp_send_reset(daddr,saddr,th, &tcp_prot, opt, dev, skb->ip_hdr->tos, 255);
-               return tcp_reset(sk,skb);       
+               tcp_reset(sk, skb);
+               return 1;
        }
 
        /*
-        *      Process the ACK
+        *      step 5: check the ACK field
         */
-        
 
-       if(th->ack && !tcp_ack(sk,th,skb->ack_seq,len))
+       if (th->ack) 
        {
-               /*
-                *      Our three way handshake failed.
-                */
-                
-               if(sk->state==TCP_SYN_RECV)
-               {
-                       tcp_send_reset(daddr, saddr, th,sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
+               int acceptable = tcp_ack(sk,th,skb->seq, skb->ack_seq,len);
+               
+               switch(sk->state) {
+               case TCP_SYN_RECV:
+                       if (acceptable)
+                       {
+                               tcp_set_state(sk, TCP_ESTABLISHED);
+                               sk->dummy_th.dest=th->source;
+                               sk->copied_seq = tp->rcv_nxt;
+
+                               if(!sk->dead)
+                                       sk->state_change(sk);           
+
+                               tp->snd_una = skb->ack_seq;
+                               tp->snd_wnd = htons(th->window);
+                               tp->snd_wl1 = skb->seq;
+                               tp->snd_wl2 = skb->ack_seq;
+
+                       }
+                       else
+                               return 1;
+                       break;
+
+               case TCP_FIN_WAIT1:
+                       
+                       if (tp->snd_una == sk->write_seq) 
+                       {
+                               sk->shutdown |= SEND_SHUTDOWN;
+                               tcp_set_state(sk, TCP_FIN_WAIT2);
+                               if (!sk->dead) 
+                                       sk->state_change(sk);
+                       }
+                       break;
+
+               case TCP_CLOSING:                       
+
+                       if (tp->snd_una == sk->write_seq) 
+                       {
+                               tcp_time_wait(sk);
+                               if (!sk->dead) 
+                                       sk->state_change(sk);
+                       }
+                       break;
+
+               case TCP_LAST_ACK:
+
+                       if (tp->snd_una == sk->write_seq) 
+                       {
+                               sk->shutdown = SHUTDOWN_MASK;
+                               tcp_set_state(sk,TCP_CLOSE);
+                               if (!sk->dead)
+                                       sk->state_change(sk);
+                               goto discard;
+                       }
+                       break;
+
+               case TCP_TIME_WAIT:
+                       /*
+                        * keep us in TIME_WAIT until we stop getting 
+                        * packets, reset the timeout.
+                        */
+                       tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
+                       break;
+
                }
-               kfree_skb(skb, FREE_READ);
-               return 0;
        }
-       
-rfc_step6:             /* I'll clean this up later */
+       else
+               goto discard;
 
-       /*
-        *      If the accepted buffer put us over our queue size we
-        *      now drop it (we must process the ack first to avoid
-        *      deadlock cases).
-        */
+  step6:
 
        /*
-        *      Process urgent data
+        *      step 6: check the URG bit
         */
-               
+
        tcp_urg(sk, th, len);
-       
-       /*
-        *      Process the encapsulated data
-        */
-       
-       if(tcp_data(skb,sk, saddr, len))
-               kfree_skb(skb, FREE_READ);
 
        /*
-        *      If our receive queue has grown past its limits,
-        *      try to prune away duplicates etc..
+        *      step 7: process the segment text
         */
-       if (sk->rmem_alloc > sk->rcvbuf)
-               prune_queue(&sk->receive_queue);
 
-       /*
-        *      And done
-        */     
+       switch (sk->state) {
+       case TCP_CLOSE_WAIT:
+       case TCP_CLOSING:
+               if (!before(skb->seq, sk->fin_seq))
+                       break;
        
-       return 0;
+       case TCP_FIN_WAIT1:
+       case TCP_FIN_WAIT2:
 
-no_tcp_socket:
-       /*
-        *      No such TCB. If th->rst is 0 send a reset (checked in tcp_send_reset)
-        */
-       tcp_send_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
+               /*
+                *      RFC 793 says to queue data in this states,
+                *      RFC 1122 says we MUST send a reset. 
+                *      BSD 4.4 also does reset.
+                */
+
+               if ((sk->shutdown & RCV_SHUTDOWN) && sk->dead)
+               {
+                       if (after(skb->end_seq - th->fin, tp->rcv_nxt))
+                       {
+                               tcp_reset(sk, skb);
+                               return 1;
+                       }
+               }
+               
+       case TCP_ESTABLISHED:
+               queued = tcp_data(skb, sk, len);
+               break;          
+       }
 
-discard_it:
        /*
-        *      Discard frame
+        *      step 8: check the FIN bit
         */
-       skb->sk = NULL;
+
+       if (th->fin)
+       {
+               tcp_fin(skb, sk, th);
+       }
+
+       tcp_data_snd_check(sk);
+       tcp_ack_snd_check(sk);
+
+       if (queued)
+               return 0;
+  discard:
+
        kfree_skb(skb, FREE_READ);
        return 0;
 }
+
+/*
+ * Local variables:
+ *  compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -c -o tcp_input.o tcp_input.c"
+ * c-file-style: "Linux"
+ * End:
+ */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
new file mode 100644 (file)
index 0000000..5008191
--- /dev/null
@@ -0,0 +1,1350 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Implementation of the Transmission Control Protocol(TCP).
+ *
+ *
+ *             IPv4 specific functions
+ *
+ *
+ *             code split from:
+ *             linux/ipv4/tcp.c
+ *             linux/ipv4/tcp_input.c
+ *             linux/ipv4/tcp_output.c
+ *
+ *             See tcp.c for author information
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/random.h>
+
+#include <net/icmp.h>
+#include <net/tcp.h>
+#include <net/ipv6.h>
+
+#include <asm/segment.h>
+
+static void tcp_v4_send_reset(unsigned long saddr, unsigned long daddr, 
+                             struct tcphdr *th, struct proto *prot, 
+                             struct options *opt,
+                             struct device *dev, int tos, int ttl);
+
+void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 
+                      struct sk_buff *skb);
+
+/*
+ *     Cached last hit socket
+ */
+static volatile unsigned long  th_cache_saddr, th_cache_daddr;
+static volatile unsigned short  th_cache_dport, th_cache_sport;
+static volatile struct sock    *th_cache_sk;
+
+void tcp_cache_zap(void)
+{
+       th_cache_sk=NULL;
+}
+
+/*
+ *     Find the socket, using the last hit cache if applicable.
+ *     The cache is not quite right...
+ */
+
+static inline struct sock * get_tcp_sock(u32 saddr, u16 sport, 
+                                        u32 daddr, u16 dport,
+                                        u32 paddr, u16 pport)
+{
+       struct sock * sk;
+
+       sk = (struct sock *) th_cache_sk;
+       if (!sk || saddr != th_cache_saddr || daddr != th_cache_daddr ||
+           sport != th_cache_sport || dport != th_cache_dport) {
+               sk = get_sock(&tcp_prot, dport, saddr, sport, daddr, 
+                             paddr, pport);
+               if (sk) {
+                       th_cache_saddr=saddr;
+                       th_cache_daddr=daddr;
+                       th_cache_dport=dport;
+                       th_cache_sport=sport;
+                       th_cache_sk=sk;
+               }
+       }
+       return sk;
+}
+
+static __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
+{
+       return secure_tcp_sequence_number(sk->saddr, sk->daddr,
+                                         skb->h.th->dest,
+                                         skb->h.th->source);
+}
+
+/*
+ *     From tcp.c
+ */
+
+/*
+ * Check that a TCP address is unique, don't allow multiple
+ * connects to/from the same address
+ */
+
+static int tcp_unique_address(u32 saddr, u16 snum, u32 daddr, u16 dnum)
+{
+       int retval = 1;
+       struct sock * sk;
+
+       /* Make sure we are allowed to connect here. */
+       cli();
+       for (sk = tcp_prot.sock_array[snum & (SOCK_ARRAY_SIZE -1)];
+                       sk != NULL; sk = sk->next)
+       {
+               /* hash collision? */
+               if (sk->num != snum)
+                       continue;
+               if (sk->saddr != saddr)
+                       continue;
+               if (sk->daddr != daddr)
+                       continue;
+               if (sk->dummy_th.dest != dnum)
+                       continue;
+               retval = 0;
+               break;
+       }
+       sti();
+       return retval;
+}
+
+/*
+ *     This will initiate an outgoing connection. 
+ */
+int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+       struct sk_buff *buff;
+       struct sk_buff *skb1;
+       struct device *dev=NULL;
+       unsigned char *ptr;
+       int tmp;
+       int atype;
+       struct tcphdr *t1;
+       struct rtable *rt;
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
+
+       if (sk->state != TCP_CLOSE) 
+               return(-EISCONN);
+
+       /*
+        *      Don't allow a double connect.
+        */
+               
+       if(sk->daddr)
+               return -EINVAL;
+       
+       if (addr_len < sizeof(struct sockaddr_in)) 
+               return(-EINVAL);
+
+       if (usin->sin_family && usin->sin_family != AF_INET) 
+               return(-EAFNOSUPPORT);
+
+       /*
+        *      connect() to INADDR_ANY means loopback (BSD'ism).
+        */
+       
+       if (usin->sin_addr.s_addr==INADDR_ANY)
+               usin->sin_addr.s_addr=ip_my_addr();
+                 
+       /*
+        *      Don't want a TCP connection going to a broadcast address 
+        */
+
+       if ((atype=ip_chk_addr(usin->sin_addr.s_addr)) == IS_BROADCAST 
+           || atype==IS_MULTICAST)
+       { 
+               return -ENETUNREACH;
+       }
+
+       if (!tcp_unique_address(sk->saddr, sk->num, usin->sin_addr.s_addr,
+                               usin->sin_port))
+       {
+               return -EADDRNOTAVAIL;
+       }
+  
+       lock_sock(sk);
+       sk->daddr = usin->sin_addr.s_addr;
+       sk->dummy_th.dest = usin->sin_port;
+       sk->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr,
+                                                  sk->dummy_th.source,
+                                                  usin->sin_port);
+
+       tp->snd_wnd = 0;
+       tp->snd_wl1 = 0;
+       tp->snd_wl2 = sk->write_seq;
+       tp->snd_una = sk->write_seq;
+
+       tp->rcv_nxt = 0;
+
+       sk->err = 0;
+       
+       buff = sock_wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
+       if (buff == NULL) 
+       {
+               release_sock(sk);
+               return(-ENOMEM);
+       }
+
+       buff->sk = sk;
+       buff->free = 0;
+       buff->localroute = sk->localroute;
+       
+       /*
+        *      Put in the IP header and routing stuff.
+        */
+       
+       tmp = ip_build_header(buff, sk->saddr, sk->daddr, &dev,
+                             IPPROTO_TCP, NULL, MAX_SYN_SIZE, sk->ip_tos, 
+                             sk->ip_ttl,&sk->ip_route_cache);
+
+       if (tmp < 0) 
+       {
+               sock_wfree(sk, buff);
+               release_sock(sk);
+               return(-ENETUNREACH);
+       }
+       if ((rt = sk->ip_route_cache) != NULL && !sk->saddr)
+               sk->saddr = rt->rt_src;
+       sk->rcv_saddr = sk->saddr;
+
+       t1 = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr));
+       buff->h.th = t1;
+
+       memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
+       buff->seq = sk->write_seq++;
+       t1->seq = htonl(buff->seq);
+       tp->snd_nxt = sk->write_seq;
+       buff->end_seq = sk->write_seq;
+       t1->ack = 0;
+       t1->window = htons(512);
+       t1->syn = 1;
+       t1->doff = 6;
+
+       /* use 512 or whatever user asked for */
+
+       if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
+               sk->window_clamp=rt->rt_window;
+       else
+               sk->window_clamp=0;
+
+
+       if (rt)
+               sk->mtu = rt->rt_mtu;
+       else
+               sk->mtu = dev->mtu;
+       
+#ifdef CONFIG_SKIP
+
+       /*
+        *      SKIP devices set their MTU to 65535. This is so they can take packets
+        *      unfragmented to security process then fragment. They could lie to the
+        *      TCP layer about a suitable MTU, but its easier to let skip sort it out
+        *      simply because the final package we want unfragmented is going to be
+        *
+        *      [IPHDR][IPSP][Security data][Modified TCP data][Security data]
+        */
+
+       if(skip_pick_mtu!=NULL)         /* If SKIP is loaded.. */
+               sk->mtu=skip_pick_mtu(sk->mtu,dev);
+#endif
+
+       if(sk->mtu < 64)
+               sk->mtu = 64;   /* Sanity limit */
+
+       if (sk->user_mss)
+               sk->mss = sk->user_mss;
+       else
+               sk->mss = (sk->mtu - sizeof(struct iphdr) - 
+                          sizeof(struct tcphdr));
+
+       /*
+        *      Put in the TCP options to say MSS.
+        */
+
+       ptr = skb_put(buff,4);
+       ptr[0] = TCPOPT_MSS;
+       ptr[1] = TCPOLEN_MSS;
+       ptr[2] = (sk->mss) >> 8;
+       ptr[3] = (sk->mss) & 0xff;
+       buff->csum = csum_partial(ptr, 4, 0);
+       tcp_v4_send_check(sk, t1, sizeof(struct tcphdr) + 4, buff);
+
+       /*
+        *      This must go first otherwise a really quick response 
+        *      will get reset.
+        */
+
+       tcp_cache_zap();
+       tcp_set_state(sk,TCP_SYN_SENT);
+
+       if(rt && (rt->rt_flags&RTF_IRTT))
+               tp->rto = rt->rt_irtt;
+       else
+               tp->rto = TCP_TIMEOUT_INIT;
+
+       tcp_init_xmit_timers(sk);
+       
+       /* Now works the right way instead of a hacked initial setting */
+       sk->retransmits = 0;
+
+       skb_queue_tail(&sk->write_queue, buff);
+
+       sk->packets_out++;
+       buff->when = jiffies;
+
+       skb1 = skb_clone(buff, GFP_KERNEL);
+       sk->wmem_alloc += skb1->truesize;
+       ip_queue_xmit(sk, dev, skb1, 1);  
+
+       /* Timer for repeating the SYN until an answer  */
+       tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
+       tcp_statistics.TcpActiveOpens++;
+       tcp_statistics.TcpOutSegs++;
+  
+       release_sock(sk);
+       return(0);
+}
+
+static int tcp_v4_sendmsg(struct sock *sk, struct msghdr *msg,
+                         int len, int nonblock, int flags)
+{
+       int retval = -EINVAL;
+
+       /*
+        *      Do sanity checking for sendmsg/sendto/send
+        */
+
+       if (flags & ~(MSG_OOB|MSG_DONTROUTE))
+               goto out;
+       if (msg->msg_name) {
+               struct sockaddr_in *addr=(struct sockaddr_in *)msg->msg_name;
+
+               if (msg->msg_namelen < sizeof(*addr))
+                       goto out;
+               if (addr->sin_family && addr->sin_family != AF_INET)
+                       goto out;
+               retval = -ENOTCONN;
+               if(sk->state == TCP_CLOSE)
+                       goto out;
+               retval = -EISCONN;
+               if (addr->sin_port != sk->dummy_th.dest)
+                       goto out;
+               if (addr->sin_addr.s_addr != sk->daddr)
+                       goto out;
+       }
+
+       lock_sock(sk);
+       retval = tcp_do_sendmsg(sk, msg->msg_iovlen, msg->msg_iov, 
+                               len, nonblock, flags);
+
+       release_sock(sk);
+
+out:
+       return retval;
+}
+
+/*
+ * This routine is called by the ICMP module when it gets some
+ * sort of error condition.  If err < 0 then the socket should
+ * be closed and the error returned to the user.  If err > 0
+ * it's just the icmp type << 8 | icmp code.  After adjustment
+ * header points to the first 8 bytes of the tcp header.  We need
+ * to find the appropriate port.
+ */
+
+void tcp_v4_err(int type, int code, unsigned char *header, __u32 info,
+               __u32 daddr, __u32 saddr, struct inet_protocol *protocol)
+{
+       struct tcphdr *th = (struct tcphdr *)header;
+       struct tcp_opt *tp;
+       struct sock *sk;
+
+       th =(struct tcphdr *)header;
+       sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr, 0, 0);
+
+       if (sk == NULL)
+               return;
+
+       if (type == ICMP_SOURCE_QUENCH)
+       {
+               /*
+                * FIXME:
+                * Follow BSD for now and just reduce cong_window to 1 again.
+                * It is possible that we just want to reduce the
+                * window by 1/2, or that we want to reduce ssthresh by 1/2
+                * here as well.
+                */
+
+               tp = &sk->tp_pinfo.af_tcp;
+
+               sk->cong_window = 1;
+               tp->high_seq = tp->snd_nxt;
+               
+               return;
+       }
+
+       if (type == ICMP_PARAMETERPROB)
+       {
+               sk->err=EPROTO;
+               sk->error_report(sk);
+       }
+
+#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
+       if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
+       {
+               struct rtable * rt;
+               /*
+                * Ugly trick to pass MTU to protocol layer.
+                * Really we should add argument "info" to error handler.
+                */
+               unsigned short new_mtu = info;
+
+               if ((rt = sk->ip_route_cache) != NULL)
+                       if (rt->rt_mtu > new_mtu)
+                               rt->rt_mtu = new_mtu;
+
+               if ((sk->mtu > new_mtu) &&
+                   (new_mtu > sizeof(struct iphdr)+sizeof(struct tcphdr)))
+               {
+                       sk->mss = (new_mtu - sizeof(struct iphdr) 
+                                  - sizeof(struct tcphdr));
+               }
+
+               return;
+       }
+#endif
+
+       /*
+        * If we've already connected we will keep trying
+        * until we time out, or the user gives up.
+        */
+
+       if (code <= NR_ICMP_UNREACH)
+       {
+               if(icmp_err_convert[code].fatal || sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV)
+               {
+                       sk->err = icmp_err_convert[code].errno;
+                       if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV)
+                       {
+                               tcp_statistics.TcpAttemptFails++;
+                               tcp_set_state(sk,TCP_CLOSE);
+                               sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
+                       }
+               }
+               else    /* Only an error on timeout */
+                       sk->err_soft = icmp_err_convert[code].errno;
+       }
+}
+
+/*
+ *     This routine computes a TCP checksum.
+ *
+ *     Modified January 1995 from a go-faster DOS routine by
+ *     Jorge Cwik <jorge@laser.satlink.net>
+ */
+void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, 
+                      struct sk_buff *skb)
+{
+       __u32 saddr = sk->saddr;
+       __u32 daddr = sk->daddr;
+#ifdef DEBUG_TCP_CHECK
+       u16 check;
+#endif
+       th->check = 0;
+       th->check = tcp_v4_check(th, len, saddr, daddr,
+                                csum_partial((char *)th, sizeof(*th), 
+                                             skb->csum));
+
+#ifdef DEBUG_TCP_CHECK
+       check = th->check;
+       th->check = 0;
+       th->check = tcp_v4_check(th, len, saddr, daddr,
+               csum_partial((char *)th,len,0));
+       if (check != th->check) {
+               static int count = 0;
+               if (++count < 10) {
+                       printk("Checksum %x (%x) from %p\n", th->check, check,
+                              __builtin_return_address(0));
+                       printk("TCP=<off:%d a:%d s:%d f:%d> len=%d\n", th->doff*4, th->ack, th->syn, th->fin, len);
+               }
+       }
+#endif
+}
+
+/*
+ *     This routine will send an RST to the other tcp. 
+ */
+static void tcp_v4_send_reset(unsigned long saddr, unsigned long daddr, 
+                             struct tcphdr *th, struct proto *prot, 
+                             struct options *opt,
+                             struct device *dev, int tos, int ttl)
+{
+       struct sk_buff *buff;
+       struct tcphdr *t1;
+       int tmp;
+       struct device *ndev=NULL;
+
+       /*
+        *      Cannot reset a reset (Think about it).
+        */
+        
+       if(th->rst)
+               return;
+  
+       /*
+        * We need to grab some memory, and put together an RST,
+        * and then put it into the queue to be sent.
+        */
+
+       buff = alloc_skb(MAX_RESET_SIZE, GFP_ATOMIC);
+       if (buff == NULL) 
+               return;
+
+       buff->sk = NULL;
+       buff->dev = dev;
+       buff->localroute = 0;
+
+
+       /*
+        *      Put in the IP header and routing stuff. 
+        */
+
+       tmp = ip_build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
+                             sizeof(struct tcphdr),tos,ttl,NULL);
+       if (tmp < 0) 
+       {
+               buff->free = 1;
+               sock_wfree(NULL, buff);
+               return;
+       }
+
+       t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
+       memset(t1, 0, sizeof(*t1));
+
+       /*
+        *      Swap the send and the receive. 
+        */
+
+       t1->dest = th->source;
+       t1->source = th->dest;
+       t1->doff = sizeof(*t1)/4;
+       t1->rst = 1;
+  
+       if(th->ack)
+       {
+               t1->seq = th->ack_seq;
+       }
+       else
+       {
+               t1->ack = 1;
+               if(!th->syn)
+                       t1->ack_seq = th->seq;
+               else
+                       t1->ack_seq = htonl(ntohl(th->seq)+1);
+       }
+
+
+       buff->csum = csum_partial((u8 *) t1, sizeof(*t1), 0);
+       t1->check = tcp_v4_check(t1, sizeof(*t1), saddr, daddr, buff->csum);
+
+       ip_queue_xmit(NULL, ndev, buff, 1);
+       tcp_statistics.TcpOutSegs++;
+}
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+/*
+ *     Check whether a received TCP packet might be for one of our
+ *     connections.
+ */
+
+int tcp_chkaddr(struct sk_buff *skb)
+{
+       struct iphdr *iph = skb->h.iph;
+       struct tcphdr *th = (struct tcphdr *)(skb->h.raw + iph->ihl*4);
+       struct sock *sk;
+
+       sk = get_sock(&tcp_prot, th->dest, iph->saddr, th->source, iph->daddr,
+                     0, 0);
+
+       if (!sk)
+               return 0;
+
+       /* 0 means accept all LOCAL addresses here, not all the world... */
+
+       if (sk->rcv_saddr == 0)
+               return 0;
+
+       return 1;
+}
+#endif
+
+static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
+{
+       struct tcp_v4_open_req *af_req = (struct tcp_v4_open_req *) req;
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+       struct sk_buff * skb;
+       struct device *dev = NULL;
+       struct rtable *rt = NULL;
+       struct tcphdr *th;
+       unsigned char *ptr;
+       int mss;
+       int tmp;
+
+       skb = sock_wmalloc(sk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
+       
+       if (skb == NULL)
+       {
+               return;
+       }
+
+       tmp = ip_build_header(skb, af_req->loc_addr, af_req->rmt_addr, &dev,
+                             IPPROTO_TCP, af_req->opt, skb->truesize, 
+                             sk->ip_tos, sk->ip_ttl, &rt);
+
+       if (tmp < 0)
+       {
+               skb->free = 1;
+               kfree_skb(skb, FREE_WRITE);
+               return;
+       }
+
+       skb->dev = dev;
+
+       if (rt)
+               mss = rt->rt_mtu;
+       else
+               mss = dev->mtu;
+       
+       mss -= sizeof(struct iphdr) + sizeof(struct tcphdr);
+       
+       if (sk->user_mss)
+               mss = min(mss, sk->user_mss);
+       
+       ip_rt_put(rt);
+       
+       th =(struct tcphdr *) skb_put(skb, sizeof(struct tcphdr));
+       skb->h.th = th;
+       memset(th, 0, sizeof(struct tcphdr));
+       
+       th->syn = 1;
+       th->ack = 1;
+
+       th->source = sk->dummy_th.source;
+       th->dest = req->rmt_port;
+              
+       skb->seq = req->snt_isn;
+       skb->end_seq = skb->seq + 1;
+
+       th->seq = ntohl(skb->seq);
+       th->ack_seq = htonl(req->rcv_isn + 1);
+       th->doff = sizeof(*th)/4 + 1;
+       
+       th->window = ntohs(tp->rcv_wnd);
+
+       ptr = skb_put(skb, TCPOLEN_MSS);
+       ptr[0] = TCPOPT_MSS;
+       ptr[1] = TCPOLEN_MSS;
+       ptr[2] = (mss >> 8) & 0xff;
+       ptr[3] = mss & 0xff;
+       skb->csum = csum_partial(ptr, TCPOLEN_MSS, 0);
+
+       th->check = tcp_v4_check(th, sizeof(*th) + TCPOLEN_MSS, af_req->loc_addr, 
+                                af_req->rmt_addr,
+                                csum_partial((char *)th, sizeof(*th), skb->csum));
+
+       ip_queue_xmit(sk, dev, skb, 1);
+       tcp_statistics.TcpOutSegs++;
+                                             
+}
+
+static void tcp_v4_or_free(struct open_request *req)
+{
+       struct tcp_v4_open_req *af_req = (struct tcp_v4_open_req *) req;
+
+       if (af_req->req.sk)
+               return;
+       
+       if (af_req->opt)
+       {
+               kfree_s(af_req->opt, sizeof(struct options) + af_req->opt->optlen);
+       }
+}
+
+static struct or_calltable or_ipv4 = {
+       tcp_v4_send_synack,
+       tcp_v4_or_free
+};
+
+static int tcp_v4_syn_filter(struct sock *sk, struct sk_buff *skb, __u32 saddr)
+{
+       return 0;
+}
+
+int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 isn)
+{
+       struct options *opt = (struct options *) ptr;
+       struct tcp_v4_open_req *af_req;
+       struct open_request *req;
+       struct tcphdr *th = skb->h.th;
+       __u32 saddr = skb->saddr;
+       __u32 daddr = skb->daddr;
+
+       /* If the socket is dead, don't accept the connection.  */
+       if (sk->dead)
+       {
+               if(sk->debug)
+               {
+                       printk("Reset on %p: Connect on dead socket.\n",sk);
+               }
+               tcp_statistics.TcpAttemptFails++;
+               return -ENOTCONN;               
+       }
+
+       if (sk->ack_backlog >= sk->max_ack_backlog || 
+           tcp_v4_syn_filter(sk, skb, saddr))
+       {
+               printk(KERN_DEBUG "droping syn ack:%d max:%d\n",
+                      sk->ack_backlog, sk->max_ack_backlog);
+#ifdef CONFIG_IP_TCPSF
+               tcp_v4_random_drop(sk);
+#endif
+               tcp_statistics.TcpAttemptFails++;
+               goto exit;
+       }
+
+
+       af_req = kmalloc(sizeof(struct tcp_v4_open_req), GFP_ATOMIC);
+
+       if (af_req == NULL)
+       {
+               tcp_statistics.TcpAttemptFails++;
+               goto exit;
+       }
+
+       sk->ack_backlog++;
+       req = (struct open_request *) af_req;
+
+       memset(af_req, 0, sizeof(struct tcp_v4_open_req));
+
+       req->rcv_isn = skb->seq;
+       req->snt_isn = isn;
+
+       /* mss */
+       req->mss = tcp_parse_options(th);
+
+       if (!req->mss)
+       {
+               req->mss = 536;
+       }
+
+       req->rmt_port = th->source;
+
+       af_req->loc_addr = daddr;
+       af_req->rmt_addr = saddr;
+       
+       /*
+        *      options
+        */
+
+       if (opt && opt->optlen)
+       {
+               af_req->opt = (struct options*) kmalloc(sizeof(struct options) +
+                                                       opt->optlen, GFP_ATOMIC);
+               if (af_req->opt) 
+               {
+                       if (ip_options_echo(af_req->opt, opt, skb->daddr, 
+                                           skb->saddr, skb))
+                       {
+                               kfree_s(af_req->opt, sizeof(struct options) + 
+                                       opt->optlen);
+                               af_req->opt = NULL;
+                       }
+               }
+       }
+
+       req->class = &or_ipv4;
+
+       tcp_v4_send_synack(sk, req);
+       
+       req->expires = jiffies + TCP_TIMEOUT_INIT;
+       tcp_inc_slow_timer(TCP_SLT_SYNACK);
+       tcp_synq_queue(&sk->tp_pinfo.af_tcp, req);      
+
+       sk->data_ready(sk, 0);
+
+  exit:
+       kfree_skb(skb, FREE_READ);
+       return 0;
+}
+
+struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+                                  struct open_request *req)
+{
+       struct tcp_v4_open_req *af_req = (struct tcp_v4_open_req *) req;
+       struct tcp_opt *newtp;
+       struct sock *newsk;
+       struct rtable *rt;
+       int snd_mss;
+
+       newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
+       if (newsk == NULL)
+       {
+               return NULL;
+       }
+
+       memcpy(newsk, sk, sizeof(*newsk));
+       newsk->opt = NULL;
+       newsk->ip_route_cache  = NULL;
+       skb_queue_head_init(&newsk->write_queue);
+       skb_queue_head_init(&newsk->receive_queue);
+       skb_queue_head_init(&newsk->out_of_order_queue);
+       
+       /*
+        *      Unused
+        */
+
+       newsk->send_head = NULL;
+       newsk->send_tail = NULL;
+
+       newtp = &(newsk->tp_pinfo.af_tcp);
+       newtp->send_head = NULL;
+       newtp->retrans_head = NULL;
+
+       newtp->pending = 0;
+
+       skb_queue_head_init(&newsk->back_log);
+
+       newsk->prot->init(newsk);
+
+       newsk->cong_count = 0;
+       newsk->ssthresh = 0;
+       newtp->backoff = 0;
+       newsk->blog = 0;
+       newsk->intr = 0;
+       newsk->proc = 0;
+       newsk->done = 0;
+       newsk->partial = NULL;
+       newsk->pair = NULL;
+       newsk->wmem_alloc = 0;
+       newsk->rmem_alloc = 0;
+       newsk->localroute = sk->localroute;
+
+       newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
+
+       newsk->err = 0;
+       newsk->shutdown = 0;
+       newsk->ack_backlog = 0;
+
+       newsk->fin_seq = req->rcv_isn;
+       newsk->syn_seq = req->rcv_isn;
+       newsk->state = TCP_SYN_RECV;
+       newsk->timeout = 0;
+       newsk->ip_xmit_timeout = 0;
+
+       newsk->write_seq = req->snt_isn;
+
+       newtp->snd_wnd = ntohs(skb->h.th->window);
+       newsk->max_window = newtp->snd_wnd;
+       newtp->snd_wl1 = req->rcv_isn;
+       newtp->snd_wl2 = newsk->write_seq;
+       newtp->snd_una = newsk->write_seq++;
+       newtp->snd_nxt = newsk->write_seq;
+
+       newsk->urg_data = 0;
+       newsk->packets_out = 0;
+       newsk->retransmits = 0;
+       newsk->linger=0;
+       newsk->destroy = 0;
+       init_timer(&newsk->timer);
+       newsk->timer.data = (unsigned long) newsk;
+       newsk->timer.function = &net_timer;
+
+       tcp_init_xmit_timers(newsk);
+
+       newsk->dummy_th.source = sk->dummy_th.source;
+       newsk->dummy_th.dest = req->rmt_port;
+       
+       newtp->rcv_nxt = req->rcv_isn + 1;
+       newtp->rcv_wup = req->rcv_isn + 1;
+       newsk->copied_seq = req->rcv_isn + 1;
+
+       newsk->socket = NULL;
+
+       newsk->daddr = af_req->rmt_addr;
+       newsk->saddr = af_req->loc_addr;
+       newsk->rcv_saddr = af_req->loc_addr;
+       
+       /*
+        *      options / mss / route_cache
+        */
+       newsk->opt = af_req->opt;
+       rt = ip_rt_route(newsk->opt && newsk->opt->srr ? newsk->opt->faddr : 
+                        newsk->saddr, 0);
+
+       newsk->ip_route_cache = rt;
+       
+       if(rt != NULL && (rt->rt_flags&RTF_WINDOW))
+               newsk->window_clamp = rt->rt_window;
+       else
+               newsk->window_clamp = 0;
+
+       if (rt)
+               snd_mss = rt->rt_mtu;
+       else
+               snd_mss = skb->dev->mtu;
+       
+       newsk->mtu = snd_mss;
+       /* sanity check */
+       if (newsk->mtu < 64)
+       {
+               newsk->mtu = 64;
+       }
+
+       snd_mss -= sizeof(struct iphdr) - sizeof(struct tcphdr);
+
+       if (sk->user_mss)
+       {
+               snd_mss = min(snd_mss, sk->user_mss);
+       }
+       
+       newsk->mss = min(req->mss, snd_mss);
+       
+       inet_put_sock(newsk->num, newsk);
+
+       tcp_cache_zap();
+
+       return newsk;
+}
+
+/*
+ *     From tcp_input.c
+ */
+
+int tcp_v4_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
+              __u32 daddr, unsigned short len,
+              __u32 saddr, int redo, struct inet_protocol * protocol)
+{
+       struct tcphdr *th;      
+       struct sock *sk;
+
+       /*
+        * "redo" is 1 if we have already seen this skb but couldn't
+        * use it at that time (the socket was locked).  In that case
+        * we have already done a lot of the work (looked up the socket
+        * etc).
+        */
+
+       th = skb->h.th;
+
+       sk = skb->sk;
+
+       if (!redo)
+       {
+
+               if (skb->pkt_type!=PACKET_HOST)
+                       goto discard_it;
+
+               /*
+                *      Pull up the IP header.
+                */
+       
+               skb_pull(skb, skb->h.raw-skb->data);
+
+               /*
+                *      Try to use the device checksum if provided.
+                */
+               
+               switch (skb->ip_summed) 
+               {
+                       case CHECKSUM_NONE:
+                               skb->csum = csum_partial((char *)th, len, 0);
+                       case CHECKSUM_HW:
+                               if (tcp_v4_check(th,len,saddr,daddr,skb->csum))
+                                       goto discard_it;
+                       default:
+                               /* CHECKSUM_UNNECESSARY */
+               }
+
+               sk = get_tcp_sock(saddr, th->source, daddr, th->dest,
+                                 dev->pa_addr, skb->redirport);
+
+               if (!sk)
+                       goto no_tcp_socket;
+
+               skb->sk = sk;
+               skb->seq = ntohl(th->seq);
+               skb->end_seq = skb->seq + th->syn + th->fin + len - th->doff*4;
+               skb->ack_seq = ntohl(th->ack_seq);
+               
+               skb->acked = 0;
+               skb->used = 0;
+               skb->free = 1;
+               skb->saddr = saddr;
+               skb->daddr = daddr;             
+       }               
+
+       /*
+        * We may need to add it to the backlog here. 
+        */
+
+       if (sk->users)
+       {
+               __skb_queue_tail(&sk->back_log, skb);
+               return(0);
+       }
+
+       if (!sk->prot)
+       {
+               printk(KERN_DEBUG "tcp_rcv: sk->prot == NULL\n");
+               return(0);
+       }
+
+       atomic_add(skb->truesize, &sk->rmem_alloc);
+
+       if (sk->state == TCP_ESTABLISHED)
+       {
+               tcp_rcv_established(sk, skb, th, len);
+               return 0;
+       }
+
+       if (sk->state == TCP_LISTEN)
+       {
+               struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+               struct open_request *req;
+               struct tcp_v4_open_req *af_req;
+
+               /*
+                *      assumption: the socket is not in use.
+                *      as we checked the user count above and we're
+                *      running from a soft interrupt.
+                */
+               
+               req = tp->syn_wait_queue;
+               af_req = (struct tcp_v4_open_req *) req;
+               
+               if (req)
+               {
+                       do {
+                               if (af_req->rmt_addr == saddr &&
+                                   af_req->loc_addr == daddr &&
+                                   req->rmt_port == th->source)
+                               {
+                                       if (req->sk)
+                                       {
+                                               printk(KERN_DEBUG "bug: syn_recv socket "
+                                                      "exists\n");
+                                               break;
+                                       }
+
+                                       /* match */
+
+                                       atomic_sub(skb->truesize, &sk->rmem_alloc);
+                                       sk = tp->af_specific->syn_recv_sock(sk, skb, req);
+
+                                       tcp_dec_slow_timer(TCP_SLT_SYNACK);
+
+                                       if (sk == NULL)
+                                       {
+                                               goto no_tcp_socket;
+                                       }
+                                       
+                                       atomic_add(skb->truesize, &sk->rmem_alloc);
+                                       req->sk = sk;
+                                       skb->sk = sk;
+                                       break;
+                               }
+
+                               req = req->dl_next;
+                       } while (req != tp->syn_wait_queue);
+               }
+       }
+       
+       if (tcp_rcv_state_process(sk, skb, th, opt, len) == 0)
+               return 0;
+
+no_tcp_socket:
+
+       /*
+        *      No such TCB. If th->rst is 0 send a reset 
+        *      (checked in tcp_send_reset)
+        */
+
+       tcp_v4_send_reset(daddr, saddr, th, &tcp_prot, opt, dev, 
+                         skb->ip_hdr->tos, 255);
+
+discard_it:
+
+       /*
+        *      Discard frame
+        */
+
+       kfree_skb(skb, FREE_READ);
+       return 0;
+}
+
+int tcp_v4_rebuild_header(struct sock *sk, struct sk_buff *skb)
+{      
+       struct options * opt = (struct options*)skb->proto_priv;
+       struct device * dev;
+       struct rtable *rt;
+       struct iphdr *iph;
+       struct tcphdr *th;
+       int size;
+
+       /*
+        *      Discard the surplus MAC header
+        */
+       
+       skb_pull(skb, ((unsigned char *)skb->ip_hdr)-skb->data);
+
+       iph = skb->ip_hdr;
+       th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2));
+       size = skb->tail - (unsigned char *) th;
+
+       dev = skb->dev;
+
+       rt = ip_check_route(&sk->ip_route_cache, 
+                           opt->srr?opt->faddr:iph->daddr, 
+                           skb->localroute);
+
+
+#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
+       if (rt && ntohs(iph->tot_len) > rt->rt_mtu)
+               iph->frag_off &= ~htons(IP_DF);
+#endif
+                       
+       if (rt==NULL)   /* Deep poo */
+       {
+               if(skb->sk)
+               {
+                       skb->sk->err_soft=ENETUNREACH;
+                       skb->sk->error_report(skb->sk);
+               }
+               return -1;
+       }
+
+
+       dev=rt->rt_dev;
+       skb->raddr=rt->rt_gateway;
+       skb->dev=dev;
+       skb->arp=1;
+
+       if (rt->rt_hh)
+       {
+               memcpy(skb_push(skb, dev->hard_header_len), 
+                      rt->rt_hh->hh_data, dev->hard_header_len);
+
+               if (!rt->rt_hh->hh_uptodate)
+               {
+                       skb->arp = 0;
+#if RT_CACHE_DEBUG >= 2
+                       printk("tcp_do_rebuild_header: "
+                              "hh miss %08x via %08x\n", 
+                              iph->daddr, rt->rt_gateway);
+#endif
+               }
+       }
+       else if (dev->hard_header)
+       {
+               if(dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, 
+                                   skb->len)<0)
+                       skb->arp=0;
+       }
+
+       return 0;       
+}
+
+int tcp_v4_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+       return tcp_v4_rcv(skb, skb->dev, (struct options *) skb->proto_priv,
+                         skb->daddr, skb->len, skb->saddr, 1,
+                         (struct inet_protocol *) sk->pair);
+}
+
+static struct sock * tcp_v4_get_sock(struct sk_buff *skb, struct tcphdr *th)
+{
+       struct sock *sk;
+
+       sk = get_tcp_sock(skb->saddr, th->source, skb->daddr, th->dest, 0, 0);
+
+       return sk;
+}
+
+int tcp_v4_build_header(struct sock *sk, struct sk_buff *skb)
+{
+       struct device *dev = NULL;
+       int tmp;
+
+       tmp = ip_build_header(skb, sk->saddr, sk->daddr, &dev,
+                             IPPROTO_TCP, sk->opt, skb->truesize, 
+                             sk->ip_tos, sk->ip_ttl, 
+                             &sk->ip_route_cache);
+       skb->dev = dev;
+
+#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
+       if (tmp > 0)
+       {
+               skb->ip_hdr->frag_off |= htons(IP_DF);
+       }
+#endif
+
+       return tmp;
+}
+
+
+static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
+{
+       struct sockaddr_in *sin = (struct sockaddr_in *) uaddr;
+       
+       sin->sin_family         = AF_INET;
+       sin->sin_addr.s_addr    = sk->daddr;
+       sin->sin_port           = sk->dummy_th.dest;
+
+}
+
+struct tcp_func ipv4_specific = {
+       tcp_v4_build_header,
+       ip_queue_xmit,
+       tcp_v4_send_check,
+       tcp_v4_rebuild_header,
+       tcp_v4_conn_request,
+       tcp_v4_syn_recv_sock,
+       tcp_v4_init_sequence,
+       tcp_v4_get_sock,
+       ip_setsockopt,
+       ip_getsockopt,
+       v4_addr2sockaddr,
+       sizeof(struct sockaddr_in)
+};
+
+static int tcp_v4_init_sock(struct sock *sk)
+{
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+       skb_queue_head_init(&sk->out_of_order_queue);
+       tcp_init_xmit_timers(sk);
+
+       tp->srtt  = 0;
+       tp->rto  = TCP_TIMEOUT_INIT;            /*TCP_WRITE_TIME*/
+       tp->mdev = TCP_TIMEOUT_INIT;
+
+       tp->ato = 0;
+       tp->iat = (HZ/5) << 3;
+
+       tp->rcv_wnd = 8192;
+
+       /*
+        * See draft-stevens-tcpca-spec-01 for discussion of the
+        * initialization of these values.
+        */
+       sk->cong_window = 1;
+       sk->ssthresh = 0x7fffffff;
+
+       sk->priority = 1;
+       sk->state = TCP_CLOSE;
+
+       /* this is how many unacked bytes we will accept for this socket.  */
+       sk->max_unacked = 2048; /* needs to be at most 2 full packets. */
+       sk->max_ack_backlog = SOMAXCONN;
+       
+       sk->mtu = 576;
+       sk->mss = 536;
+
+       sk->dummy_th.doff = sizeof(sk->dummy_th)/4;
+       
+
+       /*
+        *      Speed up by setting some standard state for the dummy_th
+        *      if TCP uses it (maybe move to tcp_init later)
+        */
+       
+       sk->dummy_th.ack=1;     
+       sk->dummy_th.doff=sizeof(struct tcphdr)>>2;
+
+       sk->tp_pinfo.af_tcp.af_specific = &ipv4_specific;
+
+       return 0;
+}
+
+static int tcp_v4_destroy_sock(struct sock *sk)
+{
+       struct sk_buff *skb;
+
+       tcp_clear_xmit_timers(sk);
+
+       if (sk->keepopen)
+       {
+               tcp_dec_slow_timer(TCP_SLT_KEEPALIVE);
+       }
+       
+       /*
+        *      Cleanup up the write buffer. 
+        */
+        
+       while((skb = skb_dequeue(&sk->write_queue)) != NULL) {
+               IS_SKB(skb);
+               skb->free = 1;
+               kfree_skb(skb, FREE_WRITE);
+       }
+
+       /*
+        *  Cleans up our, hopefuly empty, out_of_order_queue
+        */
+
+       while((skb = skb_dequeue(&sk->out_of_order_queue)) != NULL) {
+               IS_SKB(skb);
+               kfree_skb(skb, FREE_READ);
+       }
+
+       return 0;
+}
+
+struct proto tcp_prot = {
+       tcp_close,
+       tcp_v4_connect,
+       tcp_accept,
+       NULL,
+       tcp_write_wakeup,
+       tcp_read_wakeup,
+       tcp_select,
+       tcp_ioctl,
+       tcp_v4_init_sock,
+       tcp_v4_destroy_sock,
+       tcp_shutdown,
+       tcp_setsockopt,
+       tcp_getsockopt,
+       tcp_v4_sendmsg,
+       tcp_recvmsg,
+       NULL,           /* No special bind()    */
+       tcp_v4_backlog_rcv,
+       128,
+       0,
+       "TCP",
+       0, 0,
+       NULL
+};
+
+/*
+ * Local variables:
+ *  compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -c -o tcp_ipv4.o tcp_ipv4.c"
+ * c-file-style: "Linux"
+ * End:
+ */
index 78a806952d0a205f87d830110038bc1570130cc6..50ad958950f5a757d170c980ede3bc46716be3eb 100644 (file)
  *             Matthew Dillon, <dillon@apollo.west.oic.com>
  *             Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  *             Jorge Cwik, <jorge@laser.satlink.net>
- *
- * Fixes:      Eric Schenk     : avoid multiple retransmissions in one
- *                             : round trip timeout.
  */
 
-#include <linux/config.h>
-#include <net/tcp.h>
-#include <linux/ip_fw.h>
-#include <linux/firewall.h>
-#include <linux/interrupt.h>
-
-
 /*
- * RFC 1122 says:
+ * Changes:    Pedro Roque     :       Retransmit queue handled by TCP.
+ *                             :       Fragmentation on mtu decrease
+ *                             :       Segment collapse on retransmit
+ *                             :       AF independence
  *
- * "the suggested [SWS] avoidance algorithm for the receiver is to keep
- *  RECV.NEXT + RCV.WIN fixed until:
- *  RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
+ *             Linus Torvalds  :       send_delayed_ack
  *
- * Experiments against BSD and Solaris machines show that following
- * these rules results in the BSD and Solaris machines making very
- * bad guesses about how much data they can have in flight.
- *
- * Instead we follow the BSD lead and offer a window that gives
- * the size of the current free space, truncated to a multiple
- * of 1024 bytes. If the window is smaller than
- *     min(sk->mss, MAX_WINDOW/2)
- * then we advertise the window as having size 0, unless this
- * would shrink the window we offered last time.
- * This results in as much as double the throughput as the original
- * implementation.
- *
- * We do BSD style SWS avoidance -- note that RFC1122 only says we
- * must do silly window avoidance, it does not require that we use
- * the suggested algorithm.
- *
- * The "rcvbuf" and "rmem_alloc" values are shifted by 1, because
- * they also contain buffer handling overhead etc, so the window
- * we actually use is essentially based on only half those values.
  */
-int tcp_new_window(struct sock * sk)
-{
-       unsigned long window;
-       unsigned long minwin, maxwin;
-
-       /* Get minimum and maximum window values.. */
-       minwin = sk->mss;
-       if (!minwin)
-               minwin = sk->mtu;
-       maxwin = sk->window_clamp;
-       if (!maxwin)
-               maxwin = MAX_WINDOW;
-       if (minwin > maxwin/2)
-               minwin = maxwin/2;
-
-       /* Get current rcvbuf size.. */
-       window = sk->rcvbuf/2;
-       if (window < minwin) {
-               sk->rcvbuf = minwin*2;
-               window = minwin;
-       }
-
-       /* Check rcvbuf against used and minimum window */
-       window -= sk->rmem_alloc/2;
-       if ((long)(window - minwin) < 0)                /* SWS avoidance */
-               window = 0;
 
-       if (window > 1023)
-               window &= ~1023;
-       if (window > maxwin)
-               window = maxwin;
-       return window;
-}
+#include <net/tcp.h>
 
 /*
  *     Get rid of any delayed acks, we sent one already..
  */
 static __inline__ void clear_delayed_acks(struct sock * sk)
 {
-       sk->ack_timed = 0;
+       sk->delayed_acks = 0;
        sk->ack_backlog = 0;
        sk->bytes_rcv = 0;
-       del_timer(&sk->delack_timer);
+       tcp_clear_xmit_timer(sk, TIME_DACK);
+}
+
+static __inline__ void update_send_head(struct sock *sk)
+{
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+       
+       tp->send_head = tp->send_head->next;
+
+       if (tp->send_head == (struct sk_buff *) &sk->write_queue)
+       {
+               tp->send_head = NULL;
+       }
+
+}
+
+static __inline__ int tcp_snd_test(struct sock *sk, struct sk_buff *skb)
+{
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       int nagle_check = 1;
+       int len;
+
+       /*
+        *      RFC 1122 - section 4.2.3.4
+        *
+        *      We must queue if
+        *
+        *      a) The right edge of this frame exceeds the window
+        *      b) There are packets in flight and we have a small segment
+        *         [SWS avoidance and Nagle algorithm]
+        *         (part of SWS is done on packetization)
+        *      c) We are retransmiting [Nagle]
+        *      d) We have too many packets 'in flight'
+        */
+               
+       len = skb->end_seq - skb->seq;
+
+       if (!sk->nonagle && len < (sk->mss >> 1) && sk->packets_out)
+       {
+               nagle_check = 0;
+       }
+
+       return (nagle_check && sk->packets_out < sk->cong_window &&
+               !after(skb->end_seq, tp->snd_una + tp->snd_wnd) &&
+               sk->retransmits == 0);
 }
 
 /*
@@ -108,10 +92,11 @@ static __inline__ void clear_delayed_acks(struct sock * sk)
  *     having checked it is sane seeming.
  */
  
-void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
+int tcp_send_skb(struct sock *sk, struct sk_buff *skb)
 {
-       int size;
        struct tcphdr * th = skb->h.th;
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       int size;
 
        /*
         *      length of packet (not counting length of pre-tcp headers) 
@@ -125,10 +110,10 @@ void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
         
        if (size < sizeof(struct tcphdr) || size > skb->len) 
        {
-               printk(KERN_ERR "tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
+               printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
                        skb, skb->data, th, skb->len);
                kfree_skb(skb, FREE_WRITE);
-               return;
+               return 0;
        }
 
        /*
@@ -138,165 +123,245 @@ void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
         
        if (size == sizeof(struct tcphdr)) 
        {
-               /* If it's got a syn or fin it's notionally included in the size..*/
+               /* 
+                 * If it's got a syn or fin discard
+                 */
                if(!th->syn && !th->fin) 
                {
-                       printk(KERN_ERR "tcp_send_skb: attempt to queue a bogon.\n");
+                       printk("tcp_send_skb: attempt to queue a bogon.\n");
                        kfree_skb(skb,FREE_WRITE);
-                       return;
+                       return 0;
                }
        }
 
-       /*
-        * Jacobson recommends this in the appendix of his SIGCOMM'88 paper.
-        * The idea is to do a slow start again if we haven't been doing
-        * anything for a long time, in which case we have no reason to
-        * believe that our congestion window is still correct.
-        */
-       if (sk->send_head == 0 && (jiffies - sk->idletime) > sk->rto)
-               sk->cong_window = 1;
 
        /*
         *      Actual processing.
         */
-
+        
        tcp_statistics.TcpOutSegs++;  
        skb->seq = ntohl(th->seq);
        skb->end_seq = skb->seq + size - 4*th->doff;
 
-       /*
-        *      We must queue if
-        *
-        *      a) The right edge of this frame exceeds the window
-        *      b) We are retransmitting (Nagle's rule)
-        *      c) We have too many packets 'in flight'
-        */
-        
-       if (after(skb->end_seq, sk->window_seq) ||
-           (sk->retransmits && sk->ip_xmit_timeout == TIME_WRITE) ||
-            sk->packets_out >= sk->cong_window) 
+       
+       if (tp->send_head || !tcp_snd_test(sk, skb))
        {
-               /* checksum will be supplied by tcp_write_xmit.  So
-                * we shouldn't need to set it at all.  I'm being paranoid */
-               th->check = 0;
-               if (skb->next != NULL) 
+               /* 
+                * Remember where we must start sending
+                */
+
+               if (tp->send_head == NULL)
+                       tp->send_head = skb;
+
+               skb_queue_tail(&sk->write_queue, skb);
+
+               if (sk->packets_out == 0 && !tp->pending)
                {
-                       printk(KERN_ERR "tcp_send_partial: next != NULL\n");
-                       skb_unlink(skb);
+                       tp->pending = TIME_PROBE0;
+                       tcp_reset_xmit_timer(sk, TIME_PROBE0, tp->rto);
                }
-               skb_queue_tail(&sk->write_queue, skb);
-               
-               if (before(sk->window_seq, sk->write_queue.next->end_seq) &&
-                   sk->send_head == NULL && sk->ack_backlog == 0)
-                       tcp_reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
+
        }
-       else 
+       else
        {
+               struct sk_buff * buff;
+
                /*
                 *      This is going straight out
                 */
-               clear_delayed_acks(sk);          
-               th->ack_seq = htonl(sk->acked_seq);
+
+               skb_queue_tail(&sk->write_queue, skb);
+
+               clear_delayed_acks(sk);
+                
+               th->ack_seq = htonl(tp->rcv_nxt);
                th->window = htons(tcp_select_window(sk));
 
-               tcp_send_check(th, sk->saddr, sk->daddr, size, skb);
+               tp->af_specific->send_check(sk, th, size, skb);
 
-               sk->sent_seq = sk->write_seq;
+               tp->snd_nxt = skb->end_seq;
+               
+               atomic_inc(&sk->packets_out);
 
-               /*
-                *      This is mad. The tcp retransmit queue is put together
-                *      by the ip layer. This causes half the problems with
-                *      unroutable FIN's and other things.
-                */
-                
-               sk->prot->queue_xmit(sk, skb->dev, skb, 0);
+               skb->when = jiffies;
                
-               /*
-                *      Set for next retransmit based on expected ACK time
-                *      of the first packet in the resend queue.
-                *      This is no longer a window behind.
-                */
+               buff = skb_clone(skb, GFP_ATOMIC);
+               atomic_add(buff->truesize, &sk->wmem_alloc);
+
+               tp->af_specific->queue_xmit(sk, skb->dev, buff, 1);
 
-               tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
+               if (!tcp_timer_is_set(sk, TIME_RETRANS))
+                       tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
        }
+
+       return 0;
 }
 
 /*
- *     Locking problems lead us to a messy situation where we can have
- *     multiple partially complete buffers queued up. This is really bad
- *     as we don't want to be sending partial buffers. Fix this with
- *     a semaphore or similar to lock tcp_write per socket.
- *
- *     These routines are pretty self descriptive.
+ *     Function to create two new tcp segments.
+ *     Shrinks the given segment to the specified size and appends a new
+ *     segment with the rest of the packet to the list.
+ *     This won't be called frenquently, I hope... 
  */
-struct sk_buff * tcp_dequeue_partial(struct sock * sk)
+
+static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
 {
-       struct sk_buff * skb;
-       unsigned long flags;
-
-       save_flags(flags);
-       cli();
-       skb = sk->partial;
-       if (skb) {
-               sk->partial = NULL;
-               del_timer(&sk->partial_timer);
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       struct sk_buff *buff;
+       struct tcphdr *th, *nth;        
+       int nsize;
+       int tmp;
+
+       th = skb->h.th;
+
+       /* size of new segment */
+       nsize = skb->tail - ((unsigned char *) (th + 1)) - len;
+
+       if (nsize <= 0)
+       {
+               printk(KERN_DEBUG "tcp_fragment: bug size <= 0\n");
+               return -1;
        }
-       restore_flags(flags);
-       return skb;
-}
 
-/*
- *     Empty the partial queue
- */
-void tcp_send_partial(struct sock *sk)
-{
-       struct sk_buff *skb;
+       /*
+        *      Get a new skb... force flag on
+        */
+       buff = sock_wmalloc(sk, nsize + 128 + sk->prot->max_header + 15, 1, 
+                           GFP_ATOMIC);
 
-       if (sk == NULL)
-               return;
-       while ((skb = tcp_dequeue_partial(sk)) != NULL)
-               tcp_send_skb(sk, skb);
+       if (buff == NULL)
+               return -1;
+
+       buff->sk = sk;
+       buff->localroute = sk->localroute;
+               
+       /*
+        *      Put headers on the new packet
+        */
+
+       tmp = tp->af_specific->build_net_header(sk, buff);
+
+       if (tmp < 0)
+       {
+               sock_wfree(sk, buff);
+               return -1;
+       }
+               
+       /*
+        *      Move the TCP header over
+        */
+       
+       nth = (struct tcphdr *) skb_put(buff, sizeof(*th));
+
+       buff->h.th = nth;
+       
+       memcpy(nth, th, sizeof(*th));
+       
+       /*
+        *      Correct the new header
+        */
+       
+       buff->seq = skb->seq + len;
+       buff->end_seq = skb->end_seq;
+       nth->seq = htonl(buff->seq);
+       nth->check = 0;
+       nth->doff  = 5; 
+       
+       /* urg data is always an headache */
+       if (th->urg)
+       {
+               if (th->urg_ptr > len)
+               {
+                       th->urg = 0;
+                       nth->urg_ptr -= len;
+               }
+               else
+               {
+                       nth->urg = 0;
+               }
+       }
+
+       /*
+        *      Copy TCP options and data start to our new buffer
+        */
+       
+       buff->csum = csum_partial_copy(((u8 *)(th + 1)) + len,
+                                      skb_put(buff, nsize),
+                                      nsize, 0);
+       
+
+       skb->end_seq -= nsize;
+
+       skb_trim(skb, skb->len - nsize);
+
+       /* remember to checksum this packet afterwards */
+       th->check = 0;
+       skb->csum = csum_partial((u8*) (th + 1), skb->tail - ((u8 *) (th + 1)),
+                                0);
+
+       skb_append(skb, buff);
+
+       return 0;
 }
 
-/*
- *     Queue a partial frame
- */
-void tcp_enqueue_partial(struct sock * sk, struct sk_buff * skb)
+static void tcp_wrxmit_prob(struct sock *sk, struct sk_buff *skb)
 {
-       struct sk_buff * tmp;
-       unsigned long flags;
-
-       save_flags(flags);
-       cli();
-       tmp = sk->partial;
-       if (tmp)
-               del_timer(&sk->partial_timer);
-       sk->partial = skb;
-       init_timer(&sk->partial_timer);
        /*
-        *      Wait up to 1 second for the buffer to fill.
+        *      This is acked data. We can discard it. This 
+        *      cannot currently occur.
         */
-       sk->partial_timer.expires = jiffies+HZ/10;
-       sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
-       sk->partial_timer.data = (unsigned long) sk;
-       add_timer(&sk->partial_timer);
-       restore_flags(flags);
-       if (tmp)
-               tcp_send_skb(sk, tmp);
+
+       sk->retransmits = 0;
+
+       printk(KERN_DEBUG "tcp_write_xmit: bug skb in write queue\n");
+
+       update_send_head(sk);
+
+       skb_unlink(skb);        
+       skb->sk = NULL;
+       skb->free = 1;
+       kfree_skb(skb, FREE_WRITE);
+
+       if (!sk->dead)
+               sk->write_space(sk);
+}
+
+static int tcp_wrxmit_frag(struct sock *sk, struct sk_buff *skb, int size)
+{
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+       
+       printk(KERN_DEBUG "tcp_write_xmit: frag needed size=%d mss=%d\n", 
+              size, sk->mss);
+                               
+       if (tcp_fragment(sk, skb, sk->mss))
+       {
+               /* !tcp_frament Failed! */
+               tp->send_head = skb;
+               atomic_dec(&sk->packets_out);
+               return -1;
+       }
+       else
+       {
+               /* 
+                * If tcp_fragment succeded then
+                * the send head is the resulting
+                * fragment
+                */
+               tp->send_head = skb->next;
+       }
+       return 0;
 }
 
 /*
- *     This routine takes stuff off of the write queue,
- *     and puts it in the xmit queue. This happens as incoming acks
- *     open up the remote window for us.
+ *     This routine writes packets to the network.
+ *     It advances the send_head.
+ *     This happens as incoming acks open up the remote window for us.
  */
  
 void tcp_write_xmit(struct sock *sk)
 {
        struct sk_buff *skb;
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 
        /*
         *      The bytes will have to remain here. In time closedown will
@@ -308,64 +373,43 @@ void tcp_write_xmit(struct sock *sk)
 
        /*
         *      Anything on the transmit queue that fits the window can
-        *      be added providing we are not
+        *      be added providing we are:
         *
-        *      a) retransmitting (Nagle's rule)
-        *      b) exceeding our congestion window.
+        *      a) following SWS avoidance [and Nagle algorithm]
+        *      b) not exceeding our congestion window.
+        *      c) not retransmiting [Nagle]
         */
-        
-       while((skb = skb_peek(&sk->write_queue)) != NULL &&
-               !after(skb->end_seq, sk->window_seq) &&
-               (sk->retransmits == 0 ||
-                sk->ip_xmit_timeout != TIME_WRITE ||
-                !after(skb->end_seq, sk->rcv_ack_seq))
-               && sk->packets_out < sk->cong_window) 
+
+       start_bh_atomic();
+
+       while((skb = tp->send_head) && tcp_snd_test(sk, skb))
        {
                IS_SKB(skb);
-               skb_unlink(skb);
-               
+                               
                /*
-                *      See if we really need to send the whole packet. 
+                *      See if we really need to send the packet. 
                 */
                 
-               if (before(skb->end_seq, sk->rcv_ack_seq +1)) {
-                       /*
-                        *      This is acked data. We can discard it.
-                        *      This implies the packet was sent out
-                        *      of the write queue by a zero window probe.
-                        */
-                        
-                       sk->retransmits = 0;
-                       kfree_skb(skb, FREE_WRITE);
-                       if (!sk->dead) 
-                               sk->write_space(sk);
-               } else {
+               if (!after(skb->end_seq, tp->snd_una)) 
+               {
+                       tcp_wrxmit_prob(sk, skb);
+               } 
+               else
+               {
                        struct tcphdr *th;
-                       struct iphdr *iph;
+                       struct sk_buff *buff;
                        int size;
 
-                       iph = skb->ip_hdr;
-                       th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
+                       /* 
+                        * Advance the send_head
+                        * This one is going out.
+                        */
+
+                       update_send_head(sk);
+
+                       atomic_inc(&sk->packets_out);
 
-                        /* See if we need to shrink the leading packet on
-                         * the retransmit queue. Strictly speaking, we
-                         * should never need to do this, but some buggy TCP
-                         * implementations get confused if you send them
-                         * a packet that contains both old and new data. (Feh!)
-                         * Soooo, we have this uglyness here.
-                         */
-                       if (after(sk->rcv_ack_seq,skb->seq+th->syn+th->fin))
-                               tcp_shrink_skb(sk,skb,sk->rcv_ack_seq);
 
-                       size = skb->len - (((unsigned char *) th) - skb->data);
-#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
-                       if (size > sk->mtu - sizeof(struct iphdr))
-                       {
-                               iph->frag_off &= ~htons(IP_DF);
-                               ip_send_check(iph);
-                       }
-#endif
-                       
 /*
  * put in the ack seq and window at this point rather than earlier,
  * in order to keep them monotonic.  We really want to avoid taking
@@ -373,79 +417,140 @@ void tcp_write_xmit(struct sock *sk)
  * Ack and window will in general have changed since this packet was put
  * on the write queue.
  */
-                       th->ack_seq = htonl(sk->acked_seq);
-                       th->window = htons(tcp_select_window(sk));
 
-                       tcp_send_check(th, sk->saddr, sk->daddr, size, skb);
+                       th = skb->h.th;
+                       size = skb->len - (((unsigned char *) th) - skb->data);
 
-                       sk->sent_seq = skb->end_seq;
+                       if (size - (th->doff << 2) > sk->mss)
+                       {
+                               if (tcp_wrxmit_frag(sk, skb, size))
+                                       break;
+                       }
                        
-                       /*
-                        *      IP manages our queue for some crazy reason
-                        */
-                        
-                       sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
+                       th->ack_seq = htonl(tp->rcv_nxt);
+                       th->window = htons(tcp_select_window(sk));
+
+                       tp->af_specific->send_check(sk, th, size, skb);
 
+                       if (before(skb->end_seq, tp->snd_nxt)) 
+                               printk(KERN_DEBUG "tcp_write_xmit:"
+                                      " sending already sent seq\n");
+                       else
+                               tp->snd_nxt = skb->end_seq;
+                       
                        clear_delayed_acks(sk);
+                       
+                       skb->when = jiffies;
 
-                       tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
+                       buff = skb_clone(skb, GFP_ATOMIC);
+                       atomic_add(buff->truesize, &sk->wmem_alloc);
+
+                       tp->af_specific->queue_xmit(sk, skb->dev, buff, 1);
+                       
+                       if (!tcp_timer_is_set(sk, TIME_RETRANS))
+                       {
+                               tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
+                       }
                }
        }
+
+       end_bh_atomic();
+}
+
+static int tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb)
+{
+       struct tcphdr *th1, *th2;
+       int size1, size2, avail;
+       struct sk_buff *buff = skb->next;
+
+       th1 = skb->h.th;
+
+       if (th1->urg)
+               return -1;
+
+       avail = skb->end - skb->tail;
+
+       /*
+        *  size of tcp payload
+        */
+
+       size1 = skb->tail - (u8 *) (th1 + 1);
+       
+       th2 = buff->h.th;
+
+       size2 = buff->tail - (u8 *) (th2 + 1); 
+
+       if (size2 > avail || size1 + size2 > sk->mss )
+               return -1;
+
+       /*
+        *  ok. we will be able to collapse the packet
+        */
+
+       skb_unlink(buff);
+
+       memcpy(skb_put(skb, size2), ((char *) th2) + (th2->doff << 2), size2);
+       
+       /*
+        * update sizes on original skb. both TCP and IP
+        */
+       skb->end_seq += size2;
+
+       if (th2->urg)
+       {
+               th1->urg = 1;
+               th1->urg_ptr = th2->urg_ptr + size1;
+       }
+
+       /*
+        * ... and off you go.
+        */
+
+       buff->free = 1;
+       kfree_skb(buff, FREE_WRITE);
+       atomic_dec(&sk->packets_out);
+
+       /* 
+        *      Header checksum will be set by the retransmit procedure
+        *      after calling rebuild header
+        */
+
+       th1->check = 0;
+       skb->csum = csum_partial((u8*) (th1+1), size1 + size2, 0);
+
+       return 0;
 }
 
 
 /*
  *     A socket has timed out on its send queue and wants to do a
- *     little retransmitting. Currently this means TCP.
+ *     little retransmitting.
+ *     retransmit_head can be different from the head of the write_queue
+ *     if we are doing fast retransmit.
  */
 
 void tcp_do_retransmit(struct sock *sk, int all)
 {
        struct sk_buff * skb;
-       struct proto *prot;
-       struct device *dev;
-       struct rtable *rt;
+       int ct=0;
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
-       prot = sk->prot;
-       if (!all) {
-               /*
-                * If we are just retransmitting one packet reset
-                * to the start of the queue.
-                */
-               sk->send_next = sk->send_head;
-               sk->packets_out = 0;
-       }
-       skb = sk->send_next;
+       start_bh_atomic();
+
+       if (tp->retrans_head == NULL)
+               tp->retrans_head = skb_peek(&sk->write_queue);
 
-       while (skb != NULL)
+       if (tp->retrans_head == tp->send_head)
+               tp->retrans_head = NULL;
+       
+       while ((skb = tp->retrans_head) != NULL)
        {
                struct tcphdr *th;
-               struct iphdr *iph;
-               int size;
+               u32 tcp_size;
 
-               dev = skb->dev;
                IS_SKB(skb);
-               skb->when = jiffies;
-
-               /* dl1bke 960201 - @%$$! Hope this cures strange race conditions    */
-               /*                 with AX.25 mode VC. (esp. DAMA)                  */
-               /*                 if the buffer is locked we should not retransmit */
-               /*                 anyway, so we don't need all the fuss to prepare */
-               /*                 the buffer in this case.                         */
-               /*                 (the skb_pull() changes skb->data while we may   */
-               /*                 actually try to send the data. Ouch. A side      */
-               /*                 effect is that we'll send some unnecessary data, */
-               /*                 but the alternative is disastrous...     */
                
-               if (skb_device_locked(skb))
-                       break;
-
-               /*
-                *      Discard the surplus MAC header
-                */
-                
-               skb_pull(skb,((unsigned char *)skb->ip_hdr)-skb->data);
-
                /*
                 * In general it's OK just to use the old packet.  However we
                 * need to use the current ack and window fields.  Urg and
@@ -455,142 +560,75 @@ void tcp_do_retransmit(struct sock *sk, int all)
                 * changing the packet, we have to issue a new IP identifier.
                 */
 
-               iph = (struct iphdr *)skb->data;
-               th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2));
-               size = ntohs(iph->tot_len) - (iph->ihl<<2);
-               
-               /*
-                *      Note: We ought to check for window limits here but
-                *      currently this is done (less efficiently) elsewhere.
-                */
+               th = skb->h.th;
 
-               /*
-                *      Put a MAC header back on (may cause ARPing)
-                */
-                
-               {
-                       /* ANK: UGLY, but the bug, that was here, should be fixed.
-                        */
-                       struct options *  opt = (struct options*)skb->proto_priv;
-                       rt = ip_check_route(&sk->ip_route_cache, opt->srr?opt->faddr:iph->daddr, skb->localroute);
-               }
-
-               iph->id = htons(ip_id_count++);
-#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
-               if (rt && ntohs(iph->tot_len) > rt->rt_mtu)
-                       iph->frag_off &= ~htons(IP_DF);
-#endif
-               ip_send_check(iph);
-                       
-               if (rt==NULL)   /* Deep poo */
+               tcp_size = skb->tail - ((unsigned char *) (th + 1));
+
+               if (tcp_size > sk->mss)
                {
-                       if(skb->sk)
+                       if (tcp_fragment(sk, skb, sk->mss))
                        {
-                               skb->sk->err_soft=ENETUNREACH;
-                               skb->sk->error_report(skb->sk);
+                               printk(KERN_DEBUG "tcp_fragment failed\n");
+                               return;
                        }
-                       /* Can't transmit this packet, no reason
-                        * to transmit the later ones, even if
-                        * the congestion window allows.
-                        */
-                       break;
+                       atomic_inc(&sk->packets_out);
                }
-               else
+
+               if (!th->syn &&
+                   tcp_size < (sk->mss >> 1) &&
+                   skb->next != tp->send_head &&
+                   skb->next != (struct sk_buff *)&sk->write_queue)
                {
-                       dev=rt->rt_dev;
-                       skb->raddr=rt->rt_gateway;
-                       skb->dev=dev;
-                       skb->arp=1;
-#ifdef CONFIG_FIREWALL
-                       if (call_out_firewall(PF_INET, skb->dev, iph, NULL) < FW_ACCEPT) {
-                               /* The firewall wants us to dump the packet.
-                               * We have to check this here, because
-                               * the drop in ip_queue_xmit only catches the
-                               * first time we send it. We must drop on
-                               * every resend as well.
-                               */
-                               break;
-                       }
-#endif 
-                       if (rt->rt_hh)
-                       {
-                               memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
-                               if (!rt->rt_hh->hh_uptodate)
-                               {
-                                       skb->arp = 0;
-#if RT_CACHE_DEBUG >= 2
-                                       printk("tcp_do_retransmit: hh miss %08x via %08x\n", iph->daddr, rt->rt_gateway);
-#endif
-                               }
-                       }
-                       else if (dev->hard_header)
-                       {
-                               if(dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, skb->len)<0)
-                                       skb->arp=0;
-                       }
-               
+                       tcp_retrans_try_collapse(sk, skb);
+               }                       
+
+               if (tp->af_specific->rebuild_header(sk, skb) == 0) 
+               {
+                       struct sk_buff *buff;
+                       int size;
+
+                       if (sk->debug)
+                               printk("retransmit sending\n");
+
                        /*
-                        *      This is not the right way to handle this. We have to
-                        *      issue an up to date window and ack report with this 
-                        *      retransmit to keep the odd buggy tcp that relies on 
-                        *      the fact BSD does this happy. 
-                        *      We don't however need to recalculate the entire 
-                        *      checksum, so someone wanting a small problem to play
-                        *      with might like to implement RFC1141/RFC1624 and speed
-                        *      this up by avoiding a full checksum.
+                        *      update ack and window
                         */
-                
-                       th->ack_seq = htonl(sk->acked_seq);
-                       clear_delayed_acks(sk);
+                       th->ack_seq = htonl(tp->rcv_nxt);
                        th->window = ntohs(tcp_select_window(sk));
-                       tcp_send_check(th, sk->saddr, sk->daddr, size, skb);
-               
-                       /*
-                        *      If the interface is (still) up and running, kick it.
-                        */
-       
-                       if (dev->flags & IFF_UP)
-                       {
-                               /*
-                                *      If the packet is still being sent by the device/protocol
-                                *      below then don't retransmit. This is both needed, and good -
-                                *      especially with connected mode AX.25 where it stops resends
-                                *      occurring of an as yet unsent anyway frame!
-                                *      We still add up the counts as the round trip time wants
-                                *      adjusting.
-                                */
-                               if (sk && !skb_device_locked(skb))
-                               {
-                                       /* Remove it from any existing driver queue first! */
-                                       skb_unlink(skb);
-                                       /* Now queue it */
-                                       ip_statistics.IpOutRequests++;
-                                       dev_queue_xmit(skb, dev, sk->priority);
-                                       sk->packets_out++;
-                               }
-                       }
+
+                       size = skb->tail - (unsigned char *) th;
+                       tp->af_specific->send_check(sk, th, size, skb);
+
+                       skb->when = jiffies;
+                       buff = skb_clone(skb, GFP_ATOMIC);
+                       atomic_add(buff->truesize, &sk->wmem_alloc);
+
+                       clear_delayed_acks(sk);
+
+                       tp->af_specific->queue_xmit(sk, skb->dev, buff, 1);
+               }
+               else
+               {
+                       printk(KERN_DEBUG "tcp_do_rebuild_header failed\n");
+                       break;
                }
 
                /*
                 *      Count retransmissions
                 */
                 
-               sk->prot->retransmits++;
+               ct++;
+               sk->prot->retransmits ++;
                tcp_statistics.TcpRetransSegs++;
 
                /*
                 * Record the high sequence number to help avoid doing
                 * to much fast retransmission.
                 */
+
                if (sk->retransmits)
-                       sk->high_seq = sk->sent_seq;
+                      tp->high_seq = tp->snd_nxt;
                
-               /*
-                * Advance the send_next pointer so we don't keep
-                * retransmitting the same stuff every time we get an ACK.
-                */
-               sk->send_next = skb->link3;
-
                /*
                 *      Only one retransmit requested.
                 */
@@ -602,87 +640,22 @@ void tcp_do_retransmit(struct sock *sk, int all)
                 *      This should cut it off before we send too many packets.
                 */
 
-               if (sk->packets_out >= sk->cong_window)
+               if (ct >= sk->cong_window)
                        break;
 
-               skb = skb->link3;
-       }
-}
-
-/*
- *     This routine will send an RST to the other tcp. 
- */
-void tcp_send_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
-         struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
-{
-       struct sk_buff *buff;
-       struct tcphdr *t1;
-       int tmp;
-       struct device *ndev=NULL;
-
-       /*
-        *      Cannot reset a reset (Think about it).
-        */
-        
-       if(th->rst)
-               return;
-  
-       /*
-        * We need to grab some memory, and put together an RST,
-        * and then put it into the queue to be sent.
-        */
-
-       buff = alloc_skb(MAX_RESET_SIZE, GFP_ATOMIC);
-       if (buff == NULL) 
-               return;
-
-       buff->sk = NULL;
-       buff->dev = dev;
-       buff->localroute = 0;
-       buff->csum = 0;
-
-       /*
-        *      Put in the IP header and routing stuff. 
-        */
-
-       tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
-                          sizeof(struct tcphdr),tos,ttl,NULL);
-       if (tmp < 0) 
-       {
-               buff->free = 1;
-               sock_wfree(NULL, buff);
-               return;
-       }
-
-       t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
-       memset(t1, 0, sizeof(*t1));
-
-       /*
-        *      Swap the send and the receive. 
-        */
-
-       t1->dest = th->source;
-       t1->source = th->dest;
-       t1->doff = sizeof(*t1)/4;
-       t1->rst = 1;
-  
-       if(th->ack)
-       {
-               t1->seq = th->ack_seq;
-       }
-       else
-       {
-               t1->ack = 1;
-               if(!th->syn)
-                       t1->ack_seq = th->seq;
-               else
-                       t1->ack_seq = htonl(ntohl(th->seq)+1);
+               /*
+                *      Advance the pointer
+                */
+               
+               tp->retrans_head = skb->next;
+               if ((tp->retrans_head == tp->send_head) ||
+                   (tp->retrans_head == (struct sk_buff *) &sk->write_queue))
+               {
+                       tp->retrans_head = NULL;
+               }
        }
 
-       tcp_send_check(t1, saddr, daddr, sizeof(*t1), buff);
-       prot->queue_xmit(NULL, ndev, buff, 1);
-       tcp_statistics.TcpOutSegs++;
+       end_bh_atomic();
 }
 
 /*
@@ -691,19 +664,19 @@ void tcp_send_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
 
 void tcp_send_fin(struct sock *sk)
 {
-       struct proto *prot =(struct proto *)sk->prot;
        struct tcphdr *th =(struct tcphdr *)&sk->dummy_th;
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);    
        struct tcphdr *t1;
        struct sk_buff *buff;
-       struct device *dev=NULL;
        int tmp;
+       
                
-       buff = sock_wmalloc(sk, MAX_RESET_SIZE,, GFP_KERNEL);
+       buff = sock_wmalloc(sk, MAX_RESET_SIZE, 1, GFP_KERNEL);
 
        if (buff == NULL)
        {
                /* This is a disaster if it occurs */
-               printk(KERN_CRIT "tcp_send_fin: Impossible malloc failure");
+               printk("tcp_send_fin: Impossible malloc failure");
                return;
        }
 
@@ -719,9 +692,8 @@ void tcp_send_fin(struct sock *sk)
         *      Put in the IP header and routing stuff. 
         */
 
-       tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
-                          IPPROTO_TCP, sk->opt,
-                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
+       tmp = tp->af_specific->build_net_header(sk, buff);
+
        if (tmp < 0) 
        {
                int t;
@@ -747,126 +719,115 @@ void tcp_send_fin(struct sock *sk)
         */
 
        t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
-       buff->dev = dev;
+       buff->h.th =  t1;
+
        memcpy(t1, th, sizeof(*t1));
        buff->seq = sk->write_seq;
        sk->write_seq++;
        buff->end_seq = sk->write_seq;
        t1->seq = htonl(buff->seq);
-       t1->ack_seq = htonl(sk->acked_seq);
+       t1->ack_seq = htonl(tp->rcv_nxt);
        t1->window = htons(tcp_select_window(sk));
        t1->fin = 1;
-       tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), buff);
+
+       tp->af_specific->send_check(sk, t1, sizeof(*t1), buff);
 
        /*
-        * If there is data in the write queue, the fin must be appended to
-        * the write queue.
+        * The fin can only be transmited after the data.
         */
        
-       if (skb_peek(&sk->write_queue) != NULL) 
-       {
-               buff->free = 0;
-               if (buff->next != NULL) 
-               {
-                       printk(KERN_ERR "tcp_send_fin: next != NULL\n");
-                       skb_unlink(buff);
-               }
-               skb_queue_tail(&sk->write_queue, buff);
-       } 
-       else 
-       {
-               sk->sent_seq = sk->write_seq;
-               sk->prot->queue_xmit(sk, dev, buff, 0);
-               tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
+       skb_queue_tail(&sk->write_queue, buff);
+
+       if (tp->send_head == NULL)
+       {
+               struct sk_buff *skb1;
+
+               atomic_inc(&sk->packets_out);
+               tp->snd_nxt = sk->write_seq;
+               buff->when = jiffies;
+
+               skb1 = skb_clone(buff, GFP_KERNEL);
+               atomic_add(skb1->truesize, &sk->wmem_alloc);
+
+               tp->af_specific->queue_xmit(sk, skb1->dev, skb1, 1);
+
+                if (!tcp_timer_is_set(sk, TIME_RETRANS))
+                       tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
        }
 }
 
-
-void tcp_send_synack(struct sock * newsk, struct sock * sk, struct sk_buff * skb)
+int tcp_send_synack(struct sock *sk)
 {
-       struct tcphdr *t1;
-       unsigned char *ptr;
+       struct tcp_opt * tp = &(sk->tp_pinfo.af_tcp);
+       struct sk_buff * skb;   
        struct sk_buff * buff;
-       struct device *ndev=NULL;
+       struct tcphdr *th;
+       unsigned char *ptr;
        int tmp;
+       
+       skb = sock_wmalloc(sk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
 
-       buff = sock_wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
-       if (buff == NULL) 
+       if (skb == NULL) 
        {
-               sk->err = ENOMEM;
-               destroy_sock(newsk);
-               kfree_skb(skb, FREE_READ);
-               tcp_statistics.TcpAttemptFails++;
-               return;
+               return -ENOMEM;
        }
-  
-       buff->sk = newsk;
-       buff->localroute = newsk->localroute;
-
-       /*
-        *      Put in the IP header and routing stuff. 
-        */
 
-       tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
-                              IPPROTO_TCP, newsk->opt, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl,&newsk->ip_route_cache);
+       skb->sk = sk;
+       skb->localroute = sk->localroute;
 
-       /*
-        *      Something went wrong. 
-        */
-
-       if (tmp < 0) 
+       tmp = tp->af_specific->build_net_header(sk, skb);
+       
+       if (tmp < 0)
        {
-               sk->err = tmp;
-               buff->free = 1;
-               kfree_skb(buff,FREE_WRITE);
-               destroy_sock(newsk);
-               skb->sk = sk;
-               kfree_skb(skb, FREE_READ);
-               tcp_statistics.TcpAttemptFails++;
-               return;
+               skb->free = 1;
+               kfree_skb(skb, FREE_WRITE);
+               return tmp;
        }
 
-       t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
-  
-       memcpy(t1, skb->h.th, sizeof(*t1));
-       buff->seq = newsk->write_seq++;
-       buff->end_seq = newsk->write_seq;
-       /*
-        *      Swap the send and the receive. 
-        */
-       t1->dest = skb->h.th->source;
-       t1->source = newsk->dummy_th.source;
-       t1->seq = ntohl(buff->seq);
-       newsk->sent_seq = newsk->write_seq;
-       t1->window = ntohs(tcp_select_window(newsk));
-       t1->syn = 1;
-       t1->ack = 1;
-       t1->urg = 0;
-       t1->rst = 0;
-       t1->psh = 0;
-       t1->ack_seq = htonl(newsk->acked_seq);
-       t1->doff = sizeof(*t1)/4+1;
-       ptr = skb_put(buff,4);
-       ptr[0] = 2;
-       ptr[1] = 4;
-       ptr[2] = ((newsk->mtu) >> 8) & 0xff;
-       ptr[3] =(newsk->mtu) & 0xff;
-       buff->csum = csum_partial(ptr, 4, 0);
-       tcp_send_check(t1, newsk->saddr, newsk->daddr, sizeof(*t1)+4, buff);
-       newsk->prot->queue_xmit(newsk, ndev, buff, 0);
-       tcp_reset_xmit_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT);
-       skb->sk = newsk;
+       th =(struct tcphdr *) skb_put(skb, sizeof(struct tcphdr));
+       skb->h.th = th;
+       memset(th, 0, sizeof(struct tcphdr));
 
-       /*
-        *      Charge the sock_buff to newsk. 
-        */
-        
-       atomic_sub(skb->truesize, &sk->rmem_alloc);
-       atomic_add(skb->truesize, &newsk->rmem_alloc);
+       th->syn = 1;
+       th->ack = 1;
+
+       th->source = sk->dummy_th.source;
+       th->dest = sk->dummy_th.dest;
+              
+       skb->seq = tp->snd_una;
+       skb->end_seq = skb->seq + 1 /* th->syn */ ;
+       th->seq = ntohl(skb->seq);
+
+       th->window = ntohs(tp->rcv_wnd);
+
+       th->ack_seq = htonl(tp->rcv_nxt);
+       th->doff = sizeof(*th)/4 + 1;
+
+       ptr = skb_put(skb, TCPOLEN_MSS);
+       ptr[0] = TCPOPT_MSS;
+       ptr[1] = TCPOLEN_MSS;
+       ptr[2] = ((sk->mss) >> 8) & 0xff;
+       ptr[3] = (sk->mss) & 0xff;
+       skb->csum = csum_partial(ptr, TCPOLEN_MSS, 0);
+
+       tp->af_specific->send_check(sk, th, sizeof(*th)+4, skb);
+
+       skb_queue_tail(&sk->write_queue, skb);
+
+       atomic_inc(&sk->packets_out);
        
-       skb_queue_tail(&sk->receive_queue,skb);
-       sk->ack_backlog++;
+       skb->when = jiffies;
+       buff = skb_clone(skb, GFP_ATOMIC);
+
+       atomic_add(skb->truesize, &sk->wmem_alloc);
+
+       tp->af_specific->queue_xmit(sk, skb->dev, buff, 1);
+
+       tcp_reset_xmit_timer(sk, TIME_RETRANS, TCP_TIMEOUT_INIT);
+
        tcp_statistics.TcpOutSegs++;
+
+       return 0;
 }
 
 /*
@@ -876,31 +837,31 @@ void tcp_send_synack(struct sock * newsk, struct sock * sk, struct sk_buff * skb
  *      - delay time <= 0.5 HZ
  *      - must send at least every 2 full sized packets
  *      - we don't have a window update to send
- *
- *     additional thoughts:
- *     - we should not delay sending an ACK if we have ato > 0.5 HZ.
- *       My thinking about this is that in this case we will just be
- *       systematically skewing the RTT calculation. (The rule about
- *       sending every two full sized packets will never need to be
- *       invoked, the delayed ack will be sent before the ATO timeout
- *       every time. Of course, the relies on our having a good estimate
- *       for packet interarrival times.)
  */
-void tcp_send_delayed_ack(struct sock * sk, int max_timeout, unsigned long timeout)
+
+void tcp_send_delayed_ack(struct sock * sk, int max_timeout)
 {
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+       unsigned long timeout, now;
+
        /* Calculate new timeout */
-       if (timeout > max_timeout)
-               timeout = max_timeout;
-       if (sk->bytes_rcv >= sk->max_unacked)
-               timeout = 0;
-       timeout += jiffies;
-
-       /* Use new timeout only if there wasn't an older one earlier  */
-       if (!del_timer(&sk->delack_timer) || timeout < sk->delack_timer.expires)
-               sk->delack_timer.expires = timeout;
-
-       sk->ack_backlog++;
-       add_timer(&sk->delack_timer);
+       now = jiffies;
+       timeout = tp->ato;
+
+       if (timeout > max_timeout || sk->bytes_rcv > (sk->mss << 2))
+       {
+               timeout = now;
+       }
+       else
+               timeout += now;
+
+       /* Use new timeout only if there wasn't a older one earlier  */
+       if (!del_timer(&tp->delack_timer) || timeout < tp->delack_timer.expires)
+       {
+               tp->delack_timer.expires = timeout;
+       }
+
+       add_timer(&tp->delack_timer);
 }
 
 
@@ -912,29 +873,15 @@ void tcp_send_delayed_ack(struct sock * sk, int max_timeout, unsigned long timeo
 void tcp_send_ack(struct sock *sk)
 {
        struct sk_buff *buff;
-       struct tcphdr *t1;
-       struct device *dev = NULL;
+       struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
+       struct tcphdr *th;
        int tmp;
 
+       
        if(sk->zapped)
-               return;         /* We have been reset, we may not send again */
-               
-       /*
-        *      If we have nothing queued for transmit and the transmit timer
-        *      is on we are just doing an ACK timeout and need to switch
-        *      to a keepalive.
-        */
-
-       clear_delayed_acks(sk);
-
-       if (sk->send_head == NULL
-           && skb_queue_empty(&sk->write_queue)
-           && sk->ip_xmit_timeout == TIME_WRITE)
        {
-               if (sk->keepopen)
-                       tcp_reset_xmit_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
-               else
-                       del_timer(&sk->retransmit_timer);
+               /* We have been reset, we may not send again */
+               return;         
        }
 
        /*
@@ -951,11 +898,13 @@ void tcp_send_ack(struct sock *sk)
                 *      bandwidth on slow links to send a spare ack than
                 *      resend packets. 
                 */
-
-               tcp_send_delayed_ack(sk, HZ/2, HZ/2);
+                
+               tcp_send_delayed_ack(sk, HZ/2);
                return;
        }
 
+       clear_delayed_acks(sk);
+
        /*
         *      Assemble a suitable TCP frame
         */
@@ -968,35 +917,39 @@ void tcp_send_ack(struct sock *sk)
         *      Put in the IP header and routing stuff. 
         */
         
-       tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
-                               IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
+       tmp = tp->af_specific->build_net_header(sk, buff);
+
        if (tmp < 0) 
        {
                buff->free = 1;
                sock_wfree(sk, buff);
                return;
        }
-#if 0  /* why does this result in problems? */
-#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
-       buff->ip_hdr->frag_off |= htons(IP_DF);
-#endif
-#endif
 
-       t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
+       th =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
+
+       memcpy(th, &sk->dummy_th, sizeof(struct tcphdr));
+
+       /*
+        *      Swap the send and the receive. 
+        */
+        
+       th->window      = ntohs(tcp_select_window(sk));
+       th->seq         = ntohl(tp->snd_nxt);
+       th->ack_seq     = ntohl(tp->rcv_nxt);
 
        /*
         *      Fill in the packet and send it
         */
-        
-       memcpy(t1, &sk->dummy_th, sizeof(*t1));
-       t1->seq     = htonl(sk->sent_seq);
-       t1->ack_seq = htonl(sk->acked_seq);
-       t1->window  = htons(tcp_select_window(sk));
 
-       tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), buff);
+       tp->af_specific->send_check(sk, th, sizeof(struct tcphdr), buff);
+
        if (sk->debug)
-                printk(KERN_ERR "\rtcp_ack: seq %x ack %x\n", sk->sent_seq, sk->acked_seq);
-       sk->prot->queue_xmit(sk, dev, buff, 1);
+                printk("\rtcp_send_ack: seq %x ack %x\n", 
+                       tp->snd_nxt, tp->rcv_nxt);
+
+       tp->af_specific->queue_xmit(sk, buff->dev, buff, 1);
+
        tcp_statistics.TcpOutSegs++;
 }
 
@@ -1007,9 +960,9 @@ void tcp_send_ack(struct sock *sk)
 
 void tcp_write_wakeup(struct sock *sk)
 {
-       struct sk_buff *buff,*skb;
+       struct sk_buff *buff, *skb;
        struct tcphdr *t1;
-       struct device *dev=NULL;
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
        int tmp;
 
        if (sk->zapped)
@@ -1030,115 +983,56 @@ void tcp_write_wakeup(struct sock *sk)
        {
                return;
        }
-       if ( before(sk->sent_seq, sk->window_seq) && 
-           (skb=skb_peek(&sk->write_queue)))
+
+       if (before(tp->snd_nxt, tp->snd_una + tp->snd_wnd) && 
+           (skb=tp->send_head))
        {
                /*
                 * We are probing the opening of a window
                 * but the window size is != 0
                 * must have been a result SWS avoidance ( sender )
                 */
-           
-               struct iphdr *iph;
-               struct tcphdr *th;
-               struct tcphdr *nth;
-               unsigned long win_size;
-#if 0
-               unsigned long ow_size;
-#endif
-       
-               /*
-                *      How many bytes can we send ?
-                */
-                
-               win_size = sk->window_seq - sk->sent_seq;
 
-               /*
-                *      Recover the buffer pointers
-                */
-                
-               iph = (struct iphdr *)skb->ip_hdr;
-               th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
+               struct tcphdr *th;
+               unsigned long win_size;
 
-               /*
-                *      Grab the data for a temporary frame
-                */
-                
-               buff = sock_wmalloc(sk, win_size + th->doff * 4 + 
-                                    (iph->ihl << 2) +
-                                    sk->prot->max_header + 15, 
-                                    1, GFP_ATOMIC);
-               if ( buff == NULL )
-                       return;
-
-               /* 
-                *      If we strip the packet on the write queue we must
-                *      be ready to retransmit this one 
-                */
-           
-               buff->free = /*0*/1;
-
-               buff->sk = sk;
-               buff->localroute = sk->localroute;
-               
-               /*
-                *      Put headers on the new packet
-                */
+               win_size = tp->snd_wnd - (tp->snd_nxt - tp->snd_una);
 
-               tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
-                                        IPPROTO_TCP, sk->opt, buff->truesize,
-                                        sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
-               if (tmp < 0) 
-               {
-                       sock_wfree(sk, buff);
-                       return;
+               if (win_size < skb->end_seq - skb->seq)
+               {
+                       if (tcp_fragment(sk, skb, win_size))
+                       {
+                               printk(KERN_DEBUG "tcp_write_wakeup: "
+                                      "fragment failed\n");
+                               return;
+                       }
                }
+
+                               
+               th = skb->h.th;
                
-               /*
-                *      Move the TCP header over
-                */
+               tp->af_specific->send_check(sk, th, th->doff * 4 + win_size, 
+                                           skb);
 
-               buff->dev = dev;
+               buff = skb_clone(skb, GFP_ATOMIC);
 
-               nth = (struct tcphdr *) skb_put(buff,sizeof(*th));
+               atomic_add(buff->truesize, &sk->wmem_alloc);
+               atomic_inc(&sk->packets_out);
 
-               memcpy(nth, th, sizeof(*th));
-               
-               /*
-                *      Correct the new header
-                */
-                
-               nth->ack = 1; 
-               nth->ack_seq = htonl(sk->acked_seq);
-               nth->window = htons(tcp_select_window(sk));
-               nth->check = 0;
+               clear_delayed_acks(sk);
 
-               /*
-                *      Copy TCP options and data start to our new buffer
-                */
-                
-               buff->csum = csum_partial_copy((void *)(th + 1), skb_put(buff,win_size),
-                               win_size + th->doff*4 - sizeof(*th), 0);
-               
-               /*
-                *      Remember our right edge sequence number.
-                */
-                
-               buff->end_seq = sk->sent_seq + win_size;
-               sk->sent_seq = buff->end_seq;           /* Hack */
-               if(th->urg && ntohs(th->urg_ptr) < win_size)
-                       nth->urg = 0;
+               if (!tcp_timer_is_set(sk, TIME_RETRANS))
+                       tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
 
-               /*
-                *      Checksum the split buffer
-                */
-                
-               tcp_send_check(nth, sk->saddr, sk->daddr, 
-                          nth->doff * 4 + win_size , buff);
+               skb->when = jiffies;
+
+               update_send_head(sk);
+
+               tp->snd_nxt = skb->end_seq;
        }
        else
        {       
-               buff = sock_wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
+               buff = sock_wmalloc(sk,MAX_ACK_SIZE, 1, GFP_ATOMIC);
                if (buff == NULL) 
                        return;
 
@@ -1151,15 +1045,15 @@ void tcp_write_wakeup(struct sock *sk)
                 *      Put in the IP header and routing stuff. 
                 */
                 
-               tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
-                               IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
+               tmp = tp->af_specific->build_net_header(sk, buff);
+
                if (tmp < 0) 
                {
                        sock_wfree(sk, buff);
                        return;
                }
 
-               t1 = (struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
+               t1 = (struct tcphdr *) skb_put(buff, sizeof(struct tcphdr));
                memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
 
                /*
@@ -1167,89 +1061,43 @@ void tcp_write_wakeup(struct sock *sk)
                 *      This should cause the other end to send an ack.
                 */
         
-               t1->seq = htonl(sk->sent_seq-1);
+               t1->seq = htonl(tp->snd_nxt-1);
 /*             t1->fin = 0;    -- We are sending a 'previous' sequence, and 0 bytes of data - thus no FIN bit */
-               t1->ack_seq = htonl(sk->acked_seq);
+               t1->ack_seq = htonl(tp->rcv_nxt);
                t1->window = htons(tcp_select_window(sk));
-               tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), buff);
-
+               
+               tp->af_specific->send_check(sk, t1, sizeof(*t1), buff);
        }               
 
        /*
         *      Send it.
         */
-       
-       sk->prot->queue_xmit(sk, dev, buff, 1);
+
+       tp->af_specific->queue_xmit(sk, buff->dev, buff, 1);
        tcp_statistics.TcpOutSegs++;
 }
 
 /*
  *     A window probe timeout has occurred.
+ *     If window is not closed send a partial packet
+ *     else a zero probe.
  */
 
 void tcp_send_probe0(struct sock *sk)
 {
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
        if (sk->zapped)
                return;         /* After a valid reset we can send no more */
 
-       tcp_write_wakeup(sk);
-
-       sk->backoff++;
-       sk->rto = min(sk->rto << 1, 120*HZ);
-       sk->retransmits++;
-       sk->prot->retransmits ++;
-       tcp_reset_xmit_timer (sk, TIME_PROBE0, sk->rto);
-}
 
-/*
- * Remove the portion of a packet that has already been sent.
- * Needed to deal with buggy TCP implementations that can't deal
- * with seeing a packet that contains some data that has already
- * been received.
- */
-void tcp_shrink_skb(struct sock *sk, struct sk_buff *skb, u32 ack)
-{
-       struct iphdr *iph;
-       struct tcphdr *th;
-       unsigned char *old, *new;
-       unsigned long len;
-       int diff;
+       tcp_write_wakeup(sk);
 
-       /*
-        *      Recover the buffer pointers
-        */
-        
-       iph = (struct iphdr *)skb->ip_hdr;
-       th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
-
-       /* how much data are we droping from the tcp frame */
-       diff = ack - skb->seq;
-       /* how much data are we keeping in the tcp frame */
-       len = (skb->end_seq - (th->fin + th->syn)) - ack;
-
-       /* pointers to new start of remaining data, and old start */
-       new = (unsigned char *)th + th->doff*4;
-       old = new+diff;
-
-       /* Update our starting seq number */
-       skb->seq = ack;
-       th->seq = htonl(ack);
-       iph->tot_len = htons(ntohs(iph->tot_len)-diff);
-
-       /* Get the partial checksum for the IP options */
-       if (th->doff*4 - sizeof(*th) > 0)
-               skb->csum = csum_partial((void *)(th+1),
-                               th->doff*4-sizeof(*th),0);
-       else
-               skb->csum = 0;
+       tp->pending = TIME_PROBE0;
 
-       /* Copy the good data down and get it's checksum */
-       skb->csum = csum_partial_copy((void *)old,(void *)new,len,skb->csum);
+       tp->backoff++;
+       tp->probes_out++;
 
-       /* shorten the skb */
-       skb_trim(skb,skb->len-diff);
-        
-       /* Checksum the shrunk buffer */
-       tcp_send_check(th, sk->saddr, sk->daddr, 
-                  th->doff * 4 + len , skb);
+       tcp_reset_xmit_timer (sk, TIME_PROBE0, 
+                             min(tp->rto << tp->backoff, 120*HZ));
 }
index 35de9fe8a125c4bd6839d446bda77c6aa38aea59..eb287a709b37995e50b3ebed05a8153d84fb289a 100644 (file)
  *             Matthew Dillon, <dillon@apollo.west.oic.com>
  *             Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  *             Jorge Cwik, <jorge@laser.satlink.net>
- *
- * Fixes:
- *
- *             Eric Schenk     : Fix retransmission timeout counting.
  */
 
 #include <net/tcp.h>
 
-void tcp_delack_timer(unsigned long data)
+static void tcp_sltimer_handler(unsigned long);
+static void tcp_syn_recv_timer(unsigned long);
+static void tcp_keepalive(unsigned long data);
+
+struct timer_list      tcp_slow_timer = {
+       NULL, NULL,
+       0, 0,
+       tcp_sltimer_handler,
+};
+
+
+struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX] = {
+       {0, TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer},          /* SYNACK       */
+       {0, TCP_KEEPALIVE_PERIOD, 0, tcp_keepalive}             /* KEEPALIVE    */
+};
+
+/*
+ * Using different timers for retransmit, delayed acks and probes
+ * We may wish use just one timer maintaining a list of expire jiffies 
+ * to optimize.
+ */
+
+void tcp_init_xmit_timers(struct sock *sk)
 {
-       tcp_send_ack((struct sock *) data);
+       init_timer(&sk->tp_pinfo.af_tcp.retransmit_timer);
+       sk->tp_pinfo.af_tcp.retransmit_timer.function=&tcp_retransmit_timer;
+       sk->tp_pinfo.af_tcp.retransmit_timer.data = (unsigned long) sk;
+       
+       init_timer(&sk->tp_pinfo.af_tcp.delack_timer);
+       sk->tp_pinfo.af_tcp.delack_timer.function=&tcp_delack_timer;
+       sk->tp_pinfo.af_tcp.delack_timer.data = (unsigned long) sk;
+
+       init_timer(&sk->tp_pinfo.af_tcp.probe_timer);
+       sk->tp_pinfo.af_tcp.probe_timer.function=&tcp_probe_timer;
+       sk->tp_pinfo.af_tcp.probe_timer.data = (unsigned long) sk;
 }
 
 /*
  *     Reset the retransmission timer
  */
  
-void tcp_reset_xmit_timer(struct sock *sk, int why, unsigned long when)
+void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
 {
-       del_timer(&sk->retransmit_timer);
-       sk->ip_xmit_timeout = why;
-       if (why == TIME_WRITE) {
-               /* In this case we want to timeout on the first packet
-                * in the resend queue. If the resend queue is empty,
-                * then the packet we are sending hasn't made it there yet,
-                * so we timeout from the current time.
-                */
-               if (sk->send_head) {
-                       sk->retransmit_timer.expires =
-                               sk->send_head->when + when;
-               } else {
-                       /* This should never happen!
-                        */
-                       printk(KERN_ERR "Error: send_head NULL in xmit_timer\n");
-                       sk->ip_xmit_timeout = 0;
-                       return;
-               }
-       } else {
-               sk->retransmit_timer.expires = jiffies+when;
-       }
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 
-       if (sk->retransmit_timer.expires < jiffies) {
-               /* We can get here if we reset the timer on an event
-                * that could not fire because the interrupts were disabled.
-                * make sure it happens soon.
-                */
-               sk->retransmit_timer.expires = jiffies+2;
+       if((long)when <= 0)
+       {               
+               printk("xmit_timer <= 0 - timer:%d when:%lx\n", what, when);
+               when=HZ/50;
        }
-       add_timer(&sk->retransmit_timer);
-}
 
-/*
- *     POLICY:
- *
- *     This is the normal code called for timeouts.  It does the retransmission
- *     and then does backoff.  tcp_do_retransmit is separated out because
- *     tcp_ack needs to send stuff from the retransmit queue without
- *     initiating a backoff.
- */
+       switch (what) {
+       case TIME_RETRANS:
+               /*
+                * When seting the transmit timer the probe timer 
+                * should not be set.
+                * The delayed ack timer can be set if we are changing the
+                * retransmit timer when removing acked frames.
+                */
+               del_timer(&tp->probe_timer);
+               del_timer(&tp->retransmit_timer);
+               tp->retransmit_timer.expires=jiffies+when;
+               add_timer(&tp->retransmit_timer);
+               break;
 
+       case TIME_DACK:
+               del_timer(&tp->delack_timer);
+               tp->delack_timer.expires=jiffies+when;
+               add_timer(&tp->delack_timer);
+               break;
 
-static void tcp_retransmit_time(struct sock *sk, int all)
-{
-       /*
-        * record how many times we've timed out.
-        * This determines when we should quite trying.
-        * This needs to be counted here, because we should not be
-        * counting one per packet we send, but rather one per round
-        * trip timeout.
-        */
-       sk->retransmits++;
+       case TIME_PROBE0:
+               del_timer(&tp->probe_timer);
+               tp->probe_timer.expires=jiffies+when;
+               add_timer(&tp->probe_timer);
+               break;  
 
-       tcp_do_retransmit(sk, all);
+       case TIME_WRITE:
+               printk("bug: tcp_reset_xmit_timer TIME_WRITE\n");
+               break;
 
-       /*
-        * Increase the timeout each time we retransmit.  Note that
-        * we do not increase the rtt estimate.  rto is initialized
-        * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
-        * that doubling rto each time is the least we can get away with.
-        * In KA9Q, Karn uses this for the first few times, and then
-        * goes to quadratic.  netBSD doubles, but only goes up to *64,
-        * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
-        * defined in the protocol as the maximum possible RTT.  I guess
-        * we'll have to use something other than TCP to talk to the
-        * University of Mars.
-        *
-        * PAWS allows us longer timeouts and large windows, so once
-        * implemented ftp to mars will work nicely. We will have to fix
-        * the 120 second clamps though!
-        */
+       default:
+               printk("bug: unknown timer value\n");
+       }
+}
 
-       sk->backoff++;
-       sk->rto = min(sk->rto << 1, 120*HZ);
+void tcp_clear_xmit_timer(struct sock *sk, int what)
+{
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 
-       /* be paranoid about the data structure... */
-       if (sk->send_head)
-               tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-       else
-               printk(KERN_ERR "send_head NULL in tcp_retransmit_time\n");
+       switch (what) {
+       case TIME_RETRANS:
+               del_timer(&tp->retransmit_timer);
+               break;
+       case TIME_DACK:
+               del_timer(&tp->delack_timer);
+               break;
+       case TIME_PROBE0:
+               del_timer(&tp->probe_timer);
+               break;  
+       default:
+               printk("bug: unknown timer value\n");
+       }
 }
 
-/*
- *     POLICY:
- *             Congestion control.
- *
- *     A timer event has trigger a tcp retransmit timeout. The
- *     socket xmit queue is ready and set up to send. Because
- *     the ack receive code keeps the queue straight we do
- *     nothing clever here.
- */
-
-void tcp_retransmit(struct sock *sk, int all)
+int tcp_timer_is_set(struct sock *sk, int what)
 {
-       if (all) 
-       {
-               tcp_retransmit_time(sk, all);
-               return;
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+       switch (what) {
+       case TIME_RETRANS:
+               return tp->retransmit_timer.next != NULL;
+               break;
+       case TIME_DACK:
+               return tp->delack_timer.next != NULL;
+               break;
+       case TIME_PROBE0:
+               return tp->probe_timer.next != NULL;
+               break;  
+       default:
+               printk("bug: unknown timer value\n");
        }
+       return 0;
+}
 
-       sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
-       /* sk->ssthresh in theory can be zero.  I guess that's OK */
-       sk->cong_count = 0;
-       sk->cong_window = 1;
+void tcp_clear_xmit_timers(struct sock *sk)
+{      
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 
-       /* Do the actual retransmit. */
-       tcp_retransmit_time(sk, all);
+       del_timer(&tp->retransmit_timer);
+       del_timer(&tp->delack_timer);
+       del_timer(&tp->probe_timer);
 }
 
 /*
@@ -175,8 +185,11 @@ static int tcp_write_timeout(struct sock *sk)
                        sk->err=sk->err_soft;
                else
                        sk->err=ETIMEDOUT;
+
+               printk(KERN_DEBUG "syn timeout\n");
+
                sk->error_report(sk);
-               del_timer(&sk->retransmit_timer);
+               tcp_clear_xmit_timers(sk);
                tcp_statistics.TcpAttemptFails++;       /* Is this right ??? - FIXME - */
                tcp_set_state(sk,TCP_CLOSE);
                /* Don't FIN, we got nothing back */
@@ -192,7 +205,9 @@ static int tcp_write_timeout(struct sock *sk)
                else
                        sk->err = ETIMEDOUT;
                sk->error_report(sk);
-               del_timer(&sk->retransmit_timer);
+
+               tcp_clear_xmit_timers(sk);
+
                /*
                 *      Time wait the socket 
                 */
@@ -213,19 +228,147 @@ static int tcp_write_timeout(struct sock *sk)
        return 1;
 }
 
-/*
- *     It could be we got here because we needed to send an ack,
- *     so we need to check for that and not just normal retransmit.
- */
-static void tcp_time_write_timeout(struct sock * sk)
-{
+
+void tcp_delack_timer(unsigned long data) {
+
+       struct sock *sk = (struct sock*)data;
+
+       if(sk->zapped)
+       {
+               return;
+       }
+       
+       if (sk->delayed_acks)
+       {
+               tcp_read_wakeup(sk);            
+       }
+}
+
+void tcp_probe_timer(unsigned long data) {
+
+       struct sock *sk = (struct sock*)data;
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+       if(sk->zapped) 
+       {               
+               return;
+       }
+       
+       if (sk->users) 
+       {
+               /* 
+                * Try again in second 
+                */
+
+               tcp_reset_xmit_timer(sk, TIME_PROBE0, HZ);
+               return;
+       }
+
        /*
-        *      Retransmission
+        *      *WARNING* RFC 1122 forbids this
+        *      FIXME: We ought not to do it, Solaris 2.5 actually has fixing
+        *      this behaviour in Solaris down as a bug fix. [AC]
         */
-       sk->prot->retransmit (sk, 0);
-       tcp_write_timeout(sk);
+       if (tp->probes_out > TCP_RETR2) 
+       {
+               if(sk->err_soft)
+                       sk->err = sk->err_soft;
+               else
+                       sk->err = ETIMEDOUT;
+               sk->error_report(sk);
+
+               /*
+                *      Time wait the socket 
+                */
+               if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 
+                   || sk->state == TCP_CLOSING ) 
+               {
+                       tcp_set_state(sk, TCP_TIME_WAIT);
+                       tcp_reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
+               }
+               else
+               {
+                       /*
+                        *      Clean up time.
+                        */
+                       tcp_set_state(sk, TCP_CLOSE);
+               }
+       }
+       
+       tcp_send_probe0(sk);
 }
 
+static __inline__ int tcp_keepopen_proc(struct sock *sk)
+{
+       int res = 0;
+
+       if (sk->state == TCP_ESTABLISHED || sk->state == TCP_CLOSE_WAIT)
+       {
+               struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+               __u32 elapsed = jiffies - tp->rcv_tstamp;
+
+               if (elapsed >= TCP_KEEPALIVE_TIME)
+               {
+                       if (tp->probes_out > TCP_KEEPALIVE_PROBES)
+                       {
+                               if(sk->err_soft)
+                                       sk->err = sk->err_soft;
+                               else
+                                       sk->err = ETIMEDOUT;
+
+                               tcp_set_state(sk, TCP_CLOSE);
+                       }
+                       else
+                       {
+                               tp->probes_out++;
+                               tp->pending = TIME_KEEPOPEN;
+                               tcp_write_wakeup(sk);
+                               res = 1;
+                       }
+               }
+       }
+       return res;
+}
+
+/*
+ *     Check all sockets for keepalive timer
+ *     Called every 75 seconds
+ *     This timer is started by af_inet init routine and is constantly
+ *     running.
+ *
+ *     It might be better to maintain a count of sockets that need it using
+ *     setsockopt/tcp_destroy_sk and only set the timer when needed.
+ */
+
+/*
+ *     don't send over 5 keepopens at a time to avoid burstiness 
+ *     on big servers [AC]
+ */
+#define MAX_KA_PROBES  5
+
+static void tcp_keepalive(unsigned long data)
+{
+       struct sock *sk;
+       int count = 0;
+       int i;
+       
+       for(i=0; i < SOCK_ARRAY_SIZE; i++)
+       {
+               sk = tcp_prot.sock_array[i];
+               while (sk)
+               {
+                       if (sk->keepopen)
+                       {
+                               count += tcp_keepopen_proc(sk);
+                       }
+
+                       if (count == MAX_KA_PROBES)
+                               return;
+                       
+                       sk = sk->next;      
+               }
+       }
+}
 
 /*
  *     The TCP retransmit timer. This lacks a few small details.
@@ -235,67 +378,174 @@ static void tcp_time_write_timeout(struct sock * sk)
  *     2.      On a 'major timeout' as defined by RFC1122 we shouldn't report
  *             ETIMEDOUT if we know an additional 'soft' error caused this.
  *             tcp_err should save a 'soft error' for us.
+ *     [Unless someone has broken it then it does, except for one 2.0 
+ *     broken case of a send when the route/device is directly unreachable,
+ *     and we error but should retry! - FIXME] [AC]
  */
 
 void tcp_retransmit_timer(unsigned long data)
 {
        struct sock *sk = (struct sock*)data;
-       int why = sk->ip_xmit_timeout;
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 
        /*
         *      We are reset. We will send no more retransmits.
         */
-        
+
        if(sk->zapped)
+       {
+               tcp_clear_xmit_timer(sk, TIME_RETRANS);
                return;
-               
-       /* 
-        *      Only process if socket is not in use
+       }
+
+       /*
+        * Clear delay ack timer
         */
 
-       if (sk->users) 
+       tcp_clear_xmit_timer(sk, TIME_DACK);
+
+       /*
+        *      Retransmission
+        */
+
+       tp->retrans_head = NULL;
+       
+
+       if (sk->retransmits == 0)
        {
-               /* Try again in 1 second */
-               sk->retransmit_timer.expires = jiffies+HZ;
-               add_timer(&sk->retransmit_timer);
-               return;
+               /* 
+                * remember window where we lost 
+                * "one half of the current window but at least 2 segments"
+                */
+               
+               sk->ssthresh = max(sk->cong_window >> 1, 2); 
+               sk->cong_count = 0;
+               sk->cong_window = 1;
        }
 
-       if (sk->ack_backlog && !sk->dead) 
-               sk->data_ready(sk,0);
+       atomic_inc(&sk->retransmits);
+
+       tcp_do_retransmit(sk, 0);
+
+       /*
+        * Increase the timeout each time we retransmit.  Note that
+        * we do not increase the rtt estimate.  rto is initialized
+        * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
+        * that doubling rto each time is the least we can get away with.
+        * In KA9Q, Karn uses this for the first few times, and then
+        * goes to quadratic.  netBSD doubles, but only goes up to *64,
+        * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
+        * defined in the protocol as the maximum possible RTT.  I guess
+        * we'll have to use something other than TCP to talk to the
+        * University of Mars.
+        *
+        * PAWS allows us longer timeouts and large windows, so once
+        * implemented ftp to mars will work nicely. We will have to fix
+        * the 120 second clamps though!
+        */
+
+       tp->backoff++;
+       tp->rto = min(tp->rto << 1, 120*HZ);
+       tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
+
+       tcp_write_timeout(sk);
+}
+
+/*
+ *     Slow timer for SYN-RECV sockets
+ */
+
+static void tcp_syn_recv_timer(unsigned long data)
+{
+       struct sock *sk;
+       unsigned long now = jiffies;
+       int i;
+
+       for(i=0; i < SOCK_ARRAY_SIZE; i++)
+       {
+               sk = tcp_prot.sock_array[i];
+               while (sk)
+               {
+                       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+                       
+                       if (sk->state == TCP_LISTEN && !sk->users &&
+                           tp->syn_wait_queue)
+                       {
+                               struct open_request *req;
+                               
+                               req = tp->syn_wait_queue;
+
+                               while (tp->syn_wait_queue &&
+                                      (((long)(req->expires - now)) <= 0))
+                               {
+                                       struct open_request *conn;
 
-       /* Now we need to figure out why the socket was on the timer. */
+                                       conn = req;
+                                       req = req->dl_next;
+
+                                       if (conn->sk && conn->sk->state > TCP_SYN_RECV)
+                                               continue;
+                                       
+                                       tcp_synq_unlink(tp, conn);
+                                       
+                                       if (conn->retrans >= TCP_RETR1)
+                                       {
+                                               printk(KERN_DEBUG "syn_recv: "
+                                                      "too many retransmits\n");
+                                               (*conn->class->destructor)(conn);
+                                               tcp_dec_slow_timer(TCP_SLT_SYNACK);
+                                               kfree(conn);
+                                       }
+                                       else
+                                       {
+                                               __u32 timeo;
+                                               
+                                               (*conn->class->rtx_syn_ack)(sk, conn);
+
+                                               conn->retrans++;
+                                               printk(KERN_DEBUG "syn_ack rtx %d\n", conn->retrans);
+                                               timeo = min((TCP_TIMEOUT_INIT 
+                                                            << conn->retrans),
+                                                           120*HZ);
+                                               conn->expires = now + timeo;
+                                               tcp_synq_queue(tp, conn);
+                                       }
+                               }
+                       }
+                       
+                       sk = sk->next;
+               }
+       }
+}
+
+void tcp_sltimer_handler(unsigned long data)
+{
+       struct tcp_sl_timer *slt = tcp_slt_array;
+       unsigned long next = ~0UL;
+       unsigned long now = jiffies;
+       int i;
 
-       switch (why) 
+       for (i=0; i < TCP_SLT_MAX; i++, slt++)
        {
-       /* Window probing */
-       case TIME_PROBE0:
-               tcp_send_probe0(sk);
-               tcp_write_timeout(sk);
-               break;
+               if (slt->count)
+               {
+                       long trigger;
 
-       /* Retransmitting */
-       case TIME_WRITE:
-               tcp_time_write_timeout(sk);
-               break;
+                       trigger = slt->period - ((long)(now - slt->last));
 
-       /* Sending Keepalives */
-       case TIME_KEEPOPEN:
-               /* 
-                * this reset_timer() call is a hack, this is not
-                * how KEEPOPEN is supposed to work.
-                */
-               tcp_reset_xmit_timer (sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
-               /* Send something to keep the connection open. */
-               if (sk->prot->write_wakeup)
-                         sk->prot->write_wakeup (sk);
-               sk->retransmits++;
-               sk->prot->retransmits++;
-               tcp_write_timeout(sk);
-               break;
+                       if (trigger <= 0)
+                       {
+                               (*slt->handler)((unsigned long) slt);
+                               slt->last = now;
+                               trigger = slt->period;
+                       }
+                       next = min(next, trigger);
+               }
+       }
 
-       default:
-               printk (KERN_ERR "rexmit_timer: timer expired - reason unknown\n");
-               break;
+       if (next != ~0UL)
+       {
+               tcp_slow_timer.expires = now + next;
+               add_timer(&tcp_slow_timer);
        }
 }
index 458a7c72b93af6704f434d4d8787e6d2b0fd5c7c..664d8116707eafb69ad969985523f051fd75bbc6 100644 (file)
@@ -50,7 +50,7 @@
 #include <net/sock.h>
 #include <net/arp.h>
 
-void delete_timer (struct sock *t)
+void net_delete_timer (struct sock *t)
 {
        unsigned long flags;
 
@@ -63,9 +63,9 @@ void delete_timer (struct sock *t)
        restore_flags (flags);
 }
 
-void reset_timer (struct sock *t, int timeout, unsigned long len)
+void net_reset_timer (struct sock *t, int timeout, unsigned long len)
 {
-       delete_timer (t);
+       net_delete_timer (t);
        t->timeout = timeout;
 #if 1
   /* FIXME: ??? */
@@ -116,7 +116,7 @@ void net_timer (unsigned long data)
                case TIME_DONE:
                        /* If the socket hasn't been closed off, re-try a bit later */
                        if (!sk->dead) {
-                               reset_timer(sk, TIME_DONE, TCP_DONE_TIME);
+                               net_reset_timer(sk, TIME_DONE, TCP_DONE_TIME);
                                break;
                        }
 
@@ -140,11 +140,11 @@ void net_timer (unsigned long data)
                case TIME_CLOSE:
                        /* We've waited long enough, close the socket. */
                        sk->state = TCP_CLOSE;
-                       delete_timer (sk);
+                       net_delete_timer (sk);
                        if (!sk->dead)
                                sk->state_change(sk);
                        sk->shutdown = SHUTDOWN_MASK;
-                       reset_timer (sk, TIME_DONE, TCP_DONE_TIME);
+                       net_reset_timer (sk, TIME_DONE, TCP_DONE_TIME);
                        break;
 
                default:
index 831fe5a5e5997583aaa973a8d0370c0141b652e3..4c074ede8c5a7ee712e9694a574cfb3a88ef926c 100644 (file)
@@ -150,8 +150,8 @@ void udp_cache_zap(void)
  * to find the appropriate port.
  */
 
-void udp_err(int type, int code, unsigned char *header, __u32 daddr,
-       __u32 saddr, struct inet_protocol *protocol)
+void udp_err(int type, int code, unsigned char *header, __u32 info,
+            __u32 daddr, __u32 saddr, struct inet_protocol *protocol)
 {
        struct udphdr *uh;
        struct sock *sk;
@@ -437,8 +437,8 @@ static int udp_sendto(struct sock *sk, const unsigned char *from, int len, int n
  *     Temporary
  */
  
-static int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len, int noblock, 
-       int flags)
+int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len, int noblock, 
+               int flags)
 {
        if(msg->msg_iovlen==1)
                return udp_sendto(sk,msg->msg_iov[0].iov_base,len, noblock, flags, msg->msg_name, msg->msg_namelen);
@@ -523,7 +523,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 
 
 /*
- *     This should be easy, if there is something there we\
+ *     This should be easy, if there is something there we
  *     return it, otherwise we block.
  */
 
@@ -591,8 +591,9 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
        return(copied);
 }
 
-int udp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
+int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
+       struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
        struct rtable *rt;
        if (addr_len < sizeof(*usin)) 
                return(-EINVAL);
@@ -632,7 +633,7 @@ static void udp_close(struct sock *sk, unsigned long timeout)
        destroy_sock(sk);
 }
 
-static inline void udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
+static inline int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 {
        /*
         *      Charge it to the socket, dropping if the queue is full.
@@ -647,9 +648,10 @@ static inline void udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
                ip_statistics.IpInDelivers--;
                skb->sk = NULL;
                kfree_skb(skb, FREE_WRITE);
-               return;
+               return 0;
        }
        udp_statistics.UdpInDatagrams++;
+       return 0;
 }
 
 
@@ -698,15 +700,6 @@ int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
        unsigned short ulen;
        int addr_type;
 
-       /*
-        * If we're doing a "redo" (the socket was busy last time
-        * around), we can just queue the packet now..
-        */
-       if (redo) {
-               udp_queue_rcv_skb(skb->sk, skb);
-               return 0;
-       }
-
        /*
         * First time through the loop.. Do all the setup stuff
         * (including finding out the socket we go to etc)
@@ -843,26 +836,26 @@ int udp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
 
 struct proto udp_prot = {
        udp_close,
-       ip_build_header,
        udp_connect,
        NULL,
-       ip_queue_xmit,
        NULL,
        NULL,
        NULL,
-       udp_rcv,
        datagram_select,
        udp_ioctl,
        NULL,
        NULL,
+       NULL,
        ip_setsockopt,
        ip_getsockopt,
        udp_sendmsg,
        udp_recvmsg,
-       NULL,           /* No special bind function */
+       NULL,                   /* No special bind function */
+       udp_queue_rcv_skb,     
        128,
        0,
        "UDP",
        0, 0,
-       {NULL,}
+       NULL
 };
+
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
new file mode 100644 (file)
index 0000000..e16ce2f
--- /dev/null
@@ -0,0 +1,18 @@
+#
+# Makefile for the Linux TCP/IP (INET6) layer.
+#
+# Note! Dependencies are done automagically by 'make dep', which also
+# removes any old dependencies. DON'T put your own dependencies here
+# unless it's something special (ie not a .c file).
+#
+
+
+O_TARGET := ipv6.o
+O_OBJS   := af_inet6.o ipv6_output.o ipv6_input.o addrconf.o sit.o \
+           ipv6_route.o ipv6_sockglue.o ndisc.o udp.o raw.o \
+           protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
+           exthdrs.o sysctl_net_ipv6.o datagram.o
+
+M_OBJS   := $(O_TARGET)
+
+include $(TOPDIR)/Rules.make
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
new file mode 100644 (file)
index 0000000..a19838f
--- /dev/null
@@ -0,0 +1,1311 @@
+/*
+ *     IPv6 Address [auto]configuration
+ *     Linux INET6 implementation
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/ndisc.h>
+#include <net/ipv6_route.h>
+#include <net/addrconf.h>
+#include <net/sit.h>
+
+#include <asm/uaccess.h>
+
+#define HASH_SIZE              16
+/*
+ *     Configured unicast address list
+ */
+struct inet6_ifaddr            *inet6_addr_lst[HASH_SIZE];
+
+/*
+ *     Hash list of configured multicast addresses
+ */
+struct ipv6_mc_list            *inet6_mcast_lst[HASH_SIZE];
+
+/*
+ *     AF_INET6 device list
+ */
+struct inet6_dev               *inet6_dev_lst;
+int                            in6_ifnum = 0;
+
+atomic_t                       addr_list_lock = 0;
+
+void addrconf_verify(unsigned long);
+
+static struct timer_list addr_chk_timer = {
+       NULL, NULL,
+       0, 0, addrconf_verify
+};
+
+
+int DupAddrDetectTransmits = 1;
+
+/*
+ *     /proc/sys switch for autoconf (enabled by default)
+ */
+int addrconf_sys_autoconf  = 1;
+
+static void addrconf_dad_start(struct inet6_ifaddr *ifp);
+static void addrconf_rs_timer(unsigned long data);
+
+int ipv6_addr_type(struct in6_addr *addr)
+{
+       u32 st;
+
+       st = addr->s6_addr32[0];
+
+       /* 
+        * UCast Provider Based Address
+        * 0x4/3
+        */
+
+       if ((st & __constant_htonl(0xE0000000)) == 
+           __constant_htonl(0x40000000))
+       {
+               return IPV6_ADDR_UNICAST;
+       }
+
+       if ((st & __constant_htonl(0xFF000000)) == 
+           __constant_htonl(0xFF000000))
+       {
+               int type = IPV6_ADDR_MULTICAST;
+
+               switch((st >> 16) & 0x0f)
+               {
+                       case 0x01:
+                               type |= IPV6_ADDR_LOOPBACK;
+                               break;
+                       case 0x02:
+                               type |= IPV6_ADDR_LINKLOCAL;
+                               break;
+                       case 0x05:
+                               type |= IPV6_ADDR_SITELOCAL;
+                               break;
+               }
+               return type;
+       }
+       
+       if ((st & __constant_htonl(0xFFC00000)) == 
+           __constant_htonl(0xFE800000))
+       {
+               return (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST);
+       }
+
+       if ((st & __constant_htonl(0xFFC00000)) == 
+           __constant_htonl(0xFEC00000))
+       {
+               return (IPV6_ADDR_SITELOCAL | IPV6_ADDR_UNICAST);
+       }
+
+       if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0)
+       {
+               if (addr->s6_addr32[2] == 0)
+               {
+                       if (addr->in6_u.u6_addr32[3] == 0)
+                       {
+                               return IPV6_ADDR_ANY;
+                       }
+
+                       if (addr->s6_addr32[3] == __constant_htonl(0x00000001))
+                       {
+                               return (IPV6_ADDR_LOOPBACK | 
+                                       IPV6_ADDR_UNICAST);
+                       }
+
+                       return (IPV6_ADDR_COMPATv4 | IPV6_ADDR_UNICAST);
+               }
+
+               if (addr->s6_addr32[2] == __constant_htonl(0x0000ffff))
+                       return IPV6_ADDR_MAPPED;
+       }
+
+       return IPV6_ADDR_RESERVED;
+}
+
+struct inet6_dev * ipv6_add_dev(struct device *dev)
+{
+       struct inet6_dev *dev6;
+
+       /*
+        *      called by netdev notifier from a syscall
+        */
+       dev6 = (struct inet6_dev *) kmalloc(sizeof(struct inet6_dev), 
+                                           GFP_ATOMIC);
+
+       if (dev6 == NULL)
+               return NULL;
+
+       memset(dev6, 0, sizeof(struct inet6_dev));
+       dev6->dev = dev;
+       dev6->if_index = ++in6_ifnum;
+
+       /*
+        *      insert at head.
+        */
+
+       dev6->next = inet6_dev_lst;
+       inet6_dev_lst = dev6;
+
+       return dev6;
+}
+
+struct inet6_dev * ipv6_dev_by_index(int index)
+{
+       struct inet6_dev *in6_dev;
+
+       for (in6_dev = inet6_dev_lst; in6_dev; in6_dev = in6_dev->next)
+       {
+               if (in6_dev->if_index == index)
+                       return in6_dev;
+       }
+
+       return NULL;
+}
+
+void addrconf_forwarding_on(void)
+{
+       struct inet6_dev *in6_dev;
+       struct in6_addr maddr;
+
+       for (in6_dev = inet6_dev_lst; in6_dev; in6_dev = in6_dev->next)
+       {
+               printk(KERN_DEBUG "dev %s\n", in6_dev->dev->name);
+
+               if (in6_dev->dev->type == ARPHRD_ETHER)
+               {
+                       printk(KERN_DEBUG "joining all-routers\n");
+                       in6_dev->router = 1;
+                       ipv6_addr_all_routers(&maddr);
+                       ipv6_dev_mc_inc(in6_dev->dev, &maddr);          
+               }
+       }
+
+       if (last_resort_rt && (last_resort_rt->rt_flags & RTI_ALLONLINK))
+       {
+               rt_release(last_resort_rt);
+               last_resort_rt = NULL;
+       }
+}
+
+struct inet6_dev * ipv6_get_idev(struct device *dev)
+{
+       struct inet6_dev *in6_dev;
+
+       for (in6_dev = inet6_dev_lst; in6_dev; in6_dev = in6_dev->next)
+       {
+               if (in6_dev->dev == dev)
+               {
+                       return in6_dev;
+               }
+       }
+       return NULL;
+}
+
+struct inet6_ifaddr * ipv6_add_addr(struct inet6_dev *idev, 
+                                   struct in6_addr *addr, int scope)
+{
+       struct inet6_ifaddr * ifaddr;
+       int hash;
+       unsigned long flags;
+
+       save_flags(flags);
+       cli();
+
+       ifaddr = (struct inet6_ifaddr *) kmalloc(sizeof(struct inet6_ifaddr), 
+                                                GFP_ATOMIC);
+
+       if (ifaddr == NULL)
+       {
+               printk(KERN_DEBUG "ipv6_add_addr: malloc failed\n");
+               restore_flags(flags);
+               return NULL;
+       }
+
+       memset(ifaddr, 0, sizeof(struct inet6_ifaddr));
+       memcpy(&ifaddr->addr, addr, sizeof(struct in6_addr));
+
+       ifaddr->scope = scope;
+       ifaddr->idev = idev;
+       
+
+       /* add to list */
+
+       hash = ipv6_addr_hash(addr);
+
+       ifaddr->lst_next = inet6_addr_lst[hash];
+       inet6_addr_lst[hash] = ifaddr;
+
+
+       /* add to inet6_dev unicast addr list */
+       ifaddr->if_next = idev->addr_list;
+       idev->addr_list = ifaddr;
+
+       restore_flags(flags);
+       return ifaddr;
+       
+}
+
+void ipv6_del_addr(struct inet6_ifaddr *ifp)
+{
+       struct inet6_ifaddr *iter, **back;
+       int hash;
+
+       if (addr_list_lock)
+       {
+               ifp->flags |= ADDR_INVALID;
+               return;
+       }
+
+       hash = ipv6_addr_hash(&ifp->addr);
+
+       iter = inet6_addr_lst[hash];
+       back = &inet6_addr_lst[hash];
+
+       for (; iter; iter = iter->lst_next)
+       {
+               if (iter == ifp)
+               {
+                       *back = ifp->lst_next;
+                       ifp->lst_next = NULL;
+                       break;
+               }
+               back = &(iter->lst_next);
+       }
+
+       iter = ifp->idev->addr_list;
+       back = &ifp->idev->addr_list;
+
+       for (; iter; iter = iter->if_next)
+       {
+               if (iter == ifp)
+               {
+                       *back = ifp->if_next;
+                       ifp->if_next = NULL;
+                       break;
+               }
+               back = &(iter->if_next);
+       }
+       
+       kfree(ifp);
+}
+
+/*
+ *     Choose an apropriate source address
+ *     should do:
+ *     i)      get an address with an apropriate scope
+ *     ii)     see if there is a specific route for the destination and use
+ *             an address of the attached interface 
+ *     iii)    don't use deprecated addresses
+ *
+ *     at the moment i believe only iii) is missing.
+ */
+struct inet6_ifaddr * ipv6_get_saddr(struct rt6_info *rt, struct in6_addr *daddr)
+{
+       int scope;
+       struct inet6_ifaddr * ifp = NULL;
+       struct inet6_dev    * i6dev;
+       struct inet6_ifaddr * match = NULL;
+       struct device *dev = NULL;
+       int i;
+
+       if (rt)
+       {
+               dev = rt->rt_dev;
+       }
+       
+       atomic_inc(&addr_list_lock);
+
+       scope = ipv6_addr_type(daddr);
+
+       scope &= IPV6_ADDR_SCOPE_MASK;
+
+       if (rt && (rt->rt_flags & RTI_ALLONLINK))
+       {
+               /*
+                *      route for the "all destinations on link" rule
+                *      when no routers are present
+                */
+               scope = IFA_LINK;
+       }
+
+       /*
+        *      known dev
+        *      search dev and walk through dev addresses
+        */
+
+       if (dev)
+       {
+               if (dev->flags & IFF_LOOPBACK)
+               {
+                       scope = IFA_HOST;
+               }
+
+               for (i6dev = inet6_dev_lst; i6dev; i6dev=i6dev->next)
+               {
+                       if (i6dev->dev == dev)
+                       {
+                               for (ifp=i6dev->addr_list; ifp; 
+                                    ifp=ifp->if_next)
+                               {
+                                       if (ifp->scope == scope)
+                                       {
+                                               if (!(ifp->flags & ADDR_STATUS))
+                                               {
+                                                       goto out;
+                                               }
+                                               if (!(ifp->flags & ADDR_INVALID))
+                                               {
+                                                       match = ifp;
+                                               }
+                                       }
+                               }
+                               break;
+                       }
+               }
+       }
+
+       if (scope == IFA_LINK)
+       {
+               goto out;
+       }
+
+       /*
+        *      dev == NULL or search failed for specified dev
+        */
+
+       for (i=0; i < HASH_SIZE; i++)
+       {
+               for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next)
+               {
+                       if (ifp->scope == scope)
+                       {
+                               if (!(ifp->flags & ADDR_STATUS))
+                               {
+                                       goto out;
+                               }
+                               if (!(ifp->flags & ADDR_INVALID))
+                               {
+                                       match = ifp;
+                               }
+                       }
+               }
+       }
+
+  out:
+       if (ifp == NULL && match)
+       {
+               ifp = match;
+       }
+       atomic_dec(&addr_list_lock);
+       return ifp;
+}
+
+struct inet6_ifaddr * ipv6_get_lladdr(struct device *dev)
+{
+       struct inet6_ifaddr *ifp;
+       struct inet6_dev *i6dev;
+
+       for (i6dev = inet6_dev_lst; i6dev; i6dev=i6dev->next)
+       {
+               if (i6dev->dev == dev)
+               {
+                       for (ifp=i6dev->addr_list; ifp; ifp=ifp->if_next)
+                       {
+                               if (ifp->scope == IFA_LINK)
+                                       return ifp;
+                       }
+                       break;
+               }
+       }
+       return NULL;
+}
+
+/*
+ *     Retrieve the ifaddr struct from an v6 address
+ *     Called from ipv6_rcv to check if the address belongs 
+ *     to the host.
+ */
+
+struct inet6_ifaddr * ipv6_chk_addr(struct in6_addr *addr)
+{
+       struct inet6_ifaddr * ifp;
+       u8 hash;
+
+       atomic_inc(&addr_list_lock);
+
+       hash = ipv6_addr_hash(addr);
+
+       for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next)
+       {
+               if (ipv6_addr_cmp(&ifp->addr, addr) == 0)
+               {
+                       break;
+               }
+       }
+
+       atomic_dec(&addr_list_lock);
+       return ifp;     
+}
+
+static void sit_route_add(struct device *dev)
+{
+       struct in6_rtmsg rtmsg; 
+       int err;
+
+       rtmsg.rtmsg_type = RTMSG_NEWROUTE;
+
+       memset(&rtmsg.rtmsg_dst, 0, sizeof(struct in6_addr));
+       memset(&rtmsg.rtmsg_gateway, 0, sizeof(struct in6_addr));
+
+       if (dev->pa_dstaddr == 0)
+       {
+               /* prefix length - 96 bytes "::d.d.d.d" */
+               rtmsg.rtmsg_prefixlen = 96;
+               rtmsg.rtmsg_metric = 1;
+               rtmsg.rtmsg_flags = RTF_NEXTHOP|RTF_UP;
+       }
+       else
+       {
+               rtmsg.rtmsg_prefixlen = 128;
+               rtmsg.rtmsg_dst.s6_addr32[3] = dev->pa_dstaddr;
+               rtmsg.rtmsg_metric = 1;
+               rtmsg.rtmsg_flags = RTF_HOST|RTF_UP;
+       }
+
+       strcpy(rtmsg.rtmsg_device, dev->name);
+
+       err = ipv6_route_add(&rtmsg);
+
+       if (err)
+       {
+               printk(KERN_DEBUG "sit_route_add: error in route_add\n");
+       }
+}
+
+static void init_loopback(struct device *dev)
+{
+       struct in6_addr addr;
+       struct inet6_dev  *idev;
+       struct inet6_ifaddr * ifp;
+       struct in6_rtmsg rtmsg;
+       char devname[] = "lo";
+       int err;
+
+       /* ::1 */
+
+       memset(&addr, 0, sizeof(struct in6_addr));
+       addr.s6_addr[15] = 1;
+
+       idev = ipv6_add_dev(dev);
+
+       if (idev == NULL)
+       {
+               printk(KERN_DEBUG "init loopback: add_dev failed\n");
+               return;
+       }
+
+       ifp = ipv6_add_addr(idev, &addr, IFA_HOST);
+
+       if (ifp == NULL)
+       {
+               printk(KERN_DEBUG "init_loopback: add_addr failed\n");
+               return;
+       }
+
+       ifp->flags |= ADDR_PERMANENT;
+
+       memcpy(&rtmsg.rtmsg_dst, &addr, sizeof(struct in6_addr));
+       memset(&rtmsg.rtmsg_gateway, 0, sizeof(struct in6_addr));
+
+       rtmsg.rtmsg_prefixlen = 128;
+       rtmsg.rtmsg_metric = 1;
+       strcpy(rtmsg.rtmsg_device, devname);
+
+       rtmsg.rtmsg_flags = RTF_NEXTHOP|RTF_HOST|RTF_UP;
+
+       err = ipv6_route_add(&rtmsg);
+
+       if (err)
+       {
+               printk(KERN_DEBUG "init_loopback: error in route_add\n");
+       }
+
+       /* add route for ::127.0.0.1 */
+}
+
+static void addrconf_eth_config(struct device *dev)
+{
+       struct in6_addr addr;
+       struct in6_addr maddr;
+       struct inet6_ifaddr * ifp;
+       struct inet6_dev    * idev;
+
+       memset(&addr, 0, sizeof(struct in6_addr));
+
+       /* generate link local address*/
+       addr.s6_addr[0] = 0xFE;
+       addr.s6_addr[1] = 0x80;
+
+       memcpy(addr.s6_addr + (sizeof(struct in6_addr) - dev->addr_len), 
+              dev->dev_addr, dev->addr_len);
+
+       idev = ipv6_add_dev(dev);
+                       
+       if (idev == NULL)
+               return;
+       
+       ifp = ipv6_add_addr(idev, &addr, IFA_LINK);
+                       
+       if (ifp == NULL)
+               return;
+
+       ifp->flags |= (DAD_INCOMPLETE | ADDR_PERMANENT);
+       ifp->prefix_len = 10;
+
+       /* join to all nodes multicast group */
+       ipv6_addr_all_nodes(&maddr);
+       ipv6_dev_mc_inc(dev, &maddr);
+       
+       if (ipv6_forwarding)
+       {
+               idev->router = 1;
+               ipv6_addr_all_routers(&maddr);
+               ipv6_dev_mc_inc(dev, &maddr);           
+       }
+
+       /* join to solicited addr multicast group */
+       addrconf_addr_solict_mult(&addr, &maddr);
+       ipv6_dev_mc_inc(dev, &maddr);
+                       
+       /* start dad */
+       addrconf_dad_start(ifp);
+}
+
+void addrconf_prefix_rcv(struct device *dev, u8 *opt, int len)
+{
+       struct prefix_info *pinfo;
+       struct rt6_info *rt;
+       __u32 valid_lft;
+       __u32 prefered_lft;
+       int addr_type;
+       unsigned long rt_expires;
+
+       pinfo = (struct prefix_info *) opt;
+       
+       if (len < sizeof(struct prefix_info))
+       {
+               printk(KERN_DEBUG "addrconf: prefix option too short\n");
+               return;
+       }
+       
+       /*
+        *      Validation checks ([ADDRCONF], page 19)
+        */
+
+       addr_type = ipv6_addr_type(&pinfo->prefix);
+
+       if (addr_type & IPV6_ADDR_LINKLOCAL)
+       {
+               return;
+       }
+
+       valid_lft = ntohl(pinfo->valid);
+       prefered_lft = ntohl(pinfo->prefered);
+
+       if (prefered_lft > valid_lft)
+       {
+               printk(KERN_WARNING
+                      "addrconf: prefix option has invalid lifetime\n");
+               return;
+       }
+
+       /*
+        *      If we where using an "all destinations on link" route
+        *      delete it
+        */
+
+       if (last_resort_rt && (last_resort_rt->rt_flags & RTI_ALLONLINK))
+       {
+               rt_release(last_resort_rt);
+               last_resort_rt = NULL;
+       }
+
+       /*
+        *      Two things going on here:
+        *      1) Add routes for on-link prefixes
+        *      2) Configure prefixes with the auto flag set
+        */
+
+       rt_expires = jiffies + valid_lft * HZ;
+       if (rt_expires < jiffies)
+       {
+               rt_expires = ~0;
+       }
+
+       rt = fibv6_lookup(&pinfo->prefix, dev, RTI_DYNAMIC|RTI_GATEWAY);
+               
+       if (rt)
+       {
+               if (pinfo->onlink == 0 || valid_lft == 0)
+               {
+                       /*
+                        *      delete route
+                        */
+                       fib6_del_rt(rt);
+                       rt = NULL;
+               }
+               else
+               {
+                       rt->rt_expires = rt_expires;
+               }
+       }
+       else if (pinfo->onlink && valid_lft)
+       {
+               struct in6_rtmsg rtmsg;
+
+               printk(KERN_DEBUG "adding on link route\n");
+               ipv6_addr_copy(&rtmsg.rtmsg_dst, &pinfo->prefix);
+               memset(&rtmsg.rtmsg_gateway, 0, sizeof(struct in6_addr));
+
+               rtmsg.rtmsg_prefixlen = pinfo->prefix_len;
+               rtmsg.rtmsg_metric = 1;
+               memcpy(rtmsg.rtmsg_device, dev->name, strlen(dev->name) + 1);
+               rtmsg.rtmsg_flags = RTF_UP | RTF_ADDRCONF;
+               rtmsg.rtmsg_info = rt_expires;
+
+               ipv6_route_add(&rtmsg);
+       }
+
+       if (pinfo->autoconf && addrconf_sys_autoconf)
+       {
+               struct inet6_ifaddr * ifp;
+               struct in6_addr addr;
+               int plen;
+
+               plen = pinfo->prefix_len >> 3;
+
+               if (plen + dev->addr_len == sizeof(struct in6_addr))
+               {
+                       memcpy(&addr, &pinfo->prefix, plen);
+                       memcpy(addr.s6_addr + plen, dev->dev_addr,
+                              dev->addr_len);
+               }
+               else
+               {
+                       printk(KERN_DEBUG
+                              "addrconf: prefix_len invalid\n");
+                       return;
+               }
+
+               ifp = ipv6_chk_addr(&addr);
+
+               if (ifp == NULL && valid_lft)
+               {
+                       /* create */
+
+                       struct inet6_dev *in6_dev;
+
+                       in6_dev = ipv6_get_idev(dev);
+
+                       if (in6_dev == NULL)
+                       {
+                               printk(KERN_DEBUG
+                                      "addrconf: device not configured\n");
+                       }
+                       
+                       ifp = ipv6_add_addr(in6_dev, &addr,
+                                           addr_type & IPV6_ADDR_SCOPE_MASK);
+
+                       if (dev->flags & IFF_MULTICAST)
+                       {
+                               struct in6_addr maddr;
+
+                               /* join to solicited addr multicast group */
+                               addrconf_addr_solict_mult(&addr, &maddr);
+                               ipv6_dev_mc_inc(dev, &maddr);
+                       }
+
+                       ifp->flags |= DAD_INCOMPLETE;
+                       ifp->prefix_len = pinfo->prefix_len;
+
+                       addrconf_dad_start(ifp);
+                       
+               }
+
+               if (ifp && valid_lft == 0)
+               {
+                       ipv6_del_addr(ifp);
+                       ifp = NULL;
+               }
+
+               if (ifp)
+               {
+                       ifp->valid_lft = valid_lft;
+                       ifp->prefered_lft = prefered_lft;
+                       ifp->tstamp = jiffies;
+               }
+       }
+
+}
+
+/*
+ *     Set destination address.
+ *     Special case for SIT interfaces where we create a new "virtual"
+ *     device.
+ */
+int addrconf_set_dstaddr(void *arg)
+{
+       struct in6_ifreq ireq;
+       struct device *dev;
+       int err;
+
+       err = copy_from_user(&ireq, arg, sizeof(struct in6_ifreq));
+       
+       if (err)
+               return -EFAULT;
+
+       dev = dev_get(ireq.devname);
+
+       if (dev->type == ARPHRD_SIT)
+       {
+               struct device *dev;
+               
+               if (!(ipv6_addr_type(&ireq.addr) & IPV6_ADDR_COMPATv4))
+               {
+                       return -EADDRNOTAVAIL;
+               }
+               
+               dev = sit_add_tunnel(ireq.addr.s6_addr32[3]);
+               
+               if (dev == NULL)
+                       return -ENOMEM;
+
+               return 0;
+       }
+       
+       return -EINVAL;
+}
+
+/*
+ *     Manual configuration of address on an interface
+ */
+int addrconf_add_ifaddr(void *arg)
+{
+       struct inet6_dev *in6_dev;
+       struct in6_ifreq ireq;
+       struct inet6_ifaddr *ifp;
+       struct device *dev;
+       int addr_type;
+       int err;
+       
+       if (!suser())
+               return -EPERM;
+       
+       err = copy_from_user(&ireq, arg, sizeof(struct in6_ifreq));
+       if (err)
+               return -EFAULT;
+
+       dev = dev_get(ireq.devname);
+
+       if (dev == NULL)
+               return -EINVAL;
+
+       in6_dev = ipv6_get_idev(dev);
+
+       if (in6_dev == NULL)
+               return -EINVAL;
+
+       addr_type  = ipv6_addr_type(&ireq.addr);
+       addr_type &= IPV6_ADDR_SCOPE_MASK;
+       
+       ifp = ipv6_add_addr(in6_dev, &ireq.addr, addr_type);
+
+       if (ifp == NULL)
+               return -ENOMEM;
+
+       if (dev->flags & IFF_MULTICAST)
+       {
+               struct in6_addr maddr;
+
+               /* join to solicited addr multicast group */
+               addrconf_addr_solict_mult(&ireq.addr, &maddr);
+               ipv6_dev_mc_inc(dev, &maddr);
+       }
+
+
+       ifp->prefix_len = ireq.prefix_len;
+       ifp->flags |= ADDR_PERMANENT;
+
+       if (!(dev->flags & (IFF_NOARP|IFF_LOOPBACK)))
+       {
+               ifp->flags |= DAD_INCOMPLETE;
+               addrconf_dad_start(ifp);
+       }
+       return 0;
+}
+
+static void sit_add_v4_addrs(struct inet6_dev *idev)
+{
+       struct inet6_ifaddr * ifp;
+       struct in6_addr addr;
+       struct device *dev;
+       int flag;
+
+       memset(&addr, 0, sizeof(struct in6_addr));
+
+       if (idev->dev->pa_dstaddr)
+       {
+               addr.s6_addr32[0] = __constant_htonl(0xfe800000);
+               flag = IFA_LINK;
+       }
+       else
+       {
+               flag = IFA_GLOBAL | IPV6_ADDR_COMPATv4;
+       }
+
+        for (dev = dev_base; dev != NULL; dev = dev->next) 
+        {
+               if (dev->family == AF_INET && (dev->flags & IFF_UP))
+               {
+                       addr.s6_addr32[3] = dev->pa_addr;
+
+                       if (dev->flags & IFF_LOOPBACK)
+                       {
+                               if (idev->dev->pa_dstaddr)
+                                       continue;
+                               
+                               flag = IFA_HOST | IPV6_ADDR_COMPATv4;
+                       }
+
+                       ifp = ipv6_add_addr(idev, &addr, flag);
+                       
+                       if (ifp == NULL)
+                               continue;
+
+                       ifp->flags |= ADDR_PERMANENT;
+               }
+        }
+}
+
+int addrconf_notify(struct notifier_block *this, unsigned long event, 
+                   void * data)
+{
+       struct device *dev;
+       struct inet6_dev    * idev;
+
+       dev = (struct device *) data;
+
+       switch(event) {
+       case NETDEV_UP:
+               switch(dev->type) {
+               case ARPHRD_SIT:
+
+                       printk(KERN_DEBUG "sit device up: %s\n", dev->name);
+
+                       /* 
+                        * Configure the tunnel with one of our IPv4 
+                        * addresses... we should configure all of 
+                        * our v4 addrs in the tunnel
+                        */
+
+                       idev = ipv6_add_dev(dev);
+                       
+                       sit_add_v4_addrs(idev);
+
+                       /*
+                        *  we do an hack for now to configure the tunnel
+                        *  route.
+                        */
+
+                       sit_route_add(dev);
+                       break;
+
+               case ARPHRD_LOOPBACK:
+                       init_loopback(dev);
+                       break;
+
+               case ARPHRD_ETHER:
+
+                       printk(KERN_DEBUG "Configuring eth interface\n");
+                       addrconf_eth_config(dev);
+                       break;
+               }
+               rt6_sndmsg(RTMSG_NEWDEVICE, NULL, NULL, 0, 0, dev->name, 0);
+               break;
+
+       case NETDEV_DOWN:
+               /*
+                *      Remove all addresses from this interface
+                *      and take the interface out of the list.
+                */
+               rt6_sndmsg(RTMSG_NEWDEVICE, NULL, NULL, 0, 0, dev->name, 0);
+
+               break;
+       }
+       
+       return NOTIFY_OK;
+}
+
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
+{
+       struct in6_rtmsg rtmsg;
+       struct device *dev;
+       int err;
+
+
+       if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
+       {
+               struct in6_addr all_routers;
+
+               /*
+                *      1) configure a link route for this interface
+                *      2) send a (delayed) router solicitation
+                */
+
+               memcpy(&rtmsg.rtmsg_dst, &ifp->addr, sizeof(struct in6_addr));
+               memset(&rtmsg.rtmsg_gateway, 0, sizeof(struct in6_addr));
+
+               dev = ifp->idev->dev;
+
+               rtmsg.rtmsg_prefixlen = ifp->prefix_len;
+               rtmsg.rtmsg_metric = 1;
+               memcpy(rtmsg.rtmsg_device, dev->name, strlen(dev->name) + 1);
+
+               rtmsg.rtmsg_flags = RTF_UP;
+
+               err = ipv6_route_add(&rtmsg);
+               
+               if (err)
+               {
+                       printk(KERN_DEBUG "dad_complete: error in route_add\n");
+               }
+
+               if (ipv6_forwarding == 0)
+               {
+                       ipv6_addr_set(&all_routers,
+                                     __constant_htonl(0xff020000U), 0, 0,
+                                     __constant_htonl(0x2U));
+
+                       /*
+                        *      If a host as already performed a random delay
+                        *      [...] as part of DAD [...] there is no need
+                        *      to delay again before sending the first RS
+                        */
+                       ndisc_send_rs(ifp->idev->dev, &ifp->addr,
+                                     &all_routers);
+
+                       ifp->probes = 1;
+                       ifp->timer.function = addrconf_rs_timer;
+                       ifp->timer.expires = (jiffies + 
+                                             RTR_SOLICITATION_INTERVAL);
+                       ifp->idev->if_flags |= IF_RS_SENT;
+                       add_timer(&ifp->timer);
+               }
+       }
+
+}
+
+static void addrconf_dad_timer(unsigned long data)
+{
+       struct inet6_ifaddr *ifp;
+       struct in6_addr unspec;
+       struct in6_addr mcaddr;
+
+       ifp = (struct inet6_ifaddr *) data;
+
+       if (ifp->probes-- == 0)
+       {
+               /*
+                * DAD was successful
+                */
+
+               ifp->flags &= ~DAD_INCOMPLETE;
+               addrconf_dad_completed(ifp);
+               return;
+       }
+
+       /* send a neighbour solicitation for our addr */
+       memset(&unspec, 0, sizeof(unspec));
+       addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
+
+       ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec);
+
+       ifp->timer.expires = jiffies + RETRANS_TIMER;
+       add_timer(&ifp->timer);
+}
+
+static void addrconf_rs_timer(unsigned long data)
+{
+       struct inet6_ifaddr *ifp;
+
+       ifp = (struct inet6_ifaddr *) data;
+
+       if (ipv6_forwarding)
+               return;
+
+       if (ifp->idev->if_flags & IF_RA_RCVD)
+       {
+               /*
+                *      Announcement received after solicitation
+                *      was sent
+                */
+               return;
+       }
+
+       if (ifp->probes++ <= MAX_RTR_SOLICITATIONS)
+       {
+               struct in6_addr all_routers;
+
+               ipv6_addr_set(&all_routers,
+                             __constant_htonl(0xff020000U), 0, 0,
+                             __constant_htonl(0x2U));
+
+               ndisc_send_rs(ifp->idev->dev, &ifp->addr,
+                             &all_routers);
+       
+               
+               ifp->timer.function = addrconf_rs_timer;
+               ifp->timer.expires = jiffies + RTR_SOLICITATION_INTERVAL;
+               add_timer(&ifp->timer);
+       }
+       else
+       {
+               printk(KERN_DEBUG "%s: no IPv6 routers present\n",
+                      ifp->idev->dev->name);
+
+               if (!default_rt_list && !last_resort_rt)
+               {
+                       struct rt6_info *rt;
+
+                       /*
+                        *      create a last resort route with all
+                        *      destinations on link
+                        */
+                       rt = kmalloc(sizeof(struct rt6_info), GFP_ATOMIC);
+
+                       if (rt)
+                       {
+                               memset(rt, 0, sizeof(struct rt6_info));
+                               rt->rt_dev = ifp->idev->dev;
+                               rt->rt_ref = 1;
+                               rt->rt_flags = (RTI_ALLONLINK | RTF_UP);
+                               last_resort_rt = rt;
+                       }
+               }
+       }
+}
+
+static void addrconf_dad_start(struct inet6_ifaddr *ifp)
+{
+       static int rand_seed = 1;
+       int rand_num;
+
+       if (rand_seed)
+       {
+               rand_seed = 0;
+               nd_rand_seed = ifp->addr.s6_addr32[3];
+       }
+
+       init_timer(&ifp->timer);
+       ifp->probes = DupAddrDetectTransmits;
+
+       rand_num = ipv6_random() % MAX_RTR_SOLICITATION_DELAY;
+
+       ifp->timer.function = addrconf_dad_timer;
+       ifp->timer.data = (unsigned long) ifp;
+       ifp->timer.expires = jiffies + rand_num;
+
+       add_timer(&ifp->timer);
+}
+
+static int iface_proc_info(char *buffer, char **start, off_t offset,
+                          int length, int dummy)
+{
+       struct inet6_ifaddr *ifp;
+       int i;
+       int len = 0;
+
+       for (i=0; i < HASH_SIZE; i++)
+               for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next)
+               {
+                       int j;
+
+                       for (j=0; j<16; j++)
+                       {
+                               sprintf(buffer + len, "%02x",
+                                       ifp->addr.s6_addr[j]);
+                               len += 2;
+                       }
+
+                       len += sprintf(buffer + len,
+                                      " %02x %02x %02x %02x %8s\n",
+                                      ifp->idev->if_index,
+                                      ifp->prefix_len,
+                                      ifp->scope,
+                                      ifp->flags,
+                                      ifp->idev->dev->name);
+               }
+
+       *start = buffer + offset;
+
+       len -= offset;
+
+       if (len > length)
+               len = length;
+       return len;
+}
+
+struct proc_dir_entry iface_proc_entry =
+{
+        0, 8, "if_inet6",
+        S_IFREG | S_IRUGO, 1, 0, 0,
+        0, NULL,
+        &iface_proc_info
+};
+
+
+/*
+ *     Periodic address status verification
+ */
+
+void addrconf_verify(unsigned long foo)
+{
+       struct inet6_ifaddr *ifp;
+       unsigned long now = jiffies;
+       int i;
+
+       for (i=0; i < HASH_SIZE; i++)
+       {
+               for (ifp=inet6_addr_lst[i]; ifp;)
+               {
+                       if (!(ifp->flags & ADDR_PERMANENT))
+                       {
+                               struct inet6_ifaddr *bp;
+                               unsigned long age;
+
+                               age = (now - ifp->tstamp) / HZ;
+
+                               if (age > ifp->prefered_lft)
+                               {
+                                       ifp->flags |= ADDR_DEPRECATED;
+                               }
+
+                               bp = ifp;
+                               ifp=ifp->lst_next;
+                               
+                               if (age > bp->valid_lft)
+                               {
+                                       ipv6_del_addr(bp);
+                               }
+                               continue;
+                       }
+                       ifp=ifp->lst_next;
+               }
+       }
+
+       addr_chk_timer.expires = jiffies + ADDR_CHECK_FREQUENCY;
+       add_timer(&addr_chk_timer);     
+}
+
+void addrconf_init()
+{
+       struct device *dev;
+
+       /* init addr hash list */         
+       memset(inet6_addr_lst, 0, 16 * sizeof(struct inet6_ifaddr *));
+
+       memset(inet6_mcast_lst,   0, 16 * sizeof(struct ipv6_mc_list *));
+
+       inet6_dev_lst = NULL;
+
+       /* 
+        *      Init loopback device
+        */
+
+       dev = dev_get("lo");
+
+       if (dev && (dev->flags & IFF_UP))
+               init_loopback(dev);
+
+       /*
+        *      and maybe:
+        *      search availiable AF_INET devs and try to configure them
+        */
+
+       dev = dev_get("eth0");
+
+       if (dev && (dev->flags & IFF_UP))
+               addrconf_eth_config(dev);
+       
+       proc_register_dynamic(&proc_net, &iface_proc_entry);
+       
+       addr_chk_timer.expires = jiffies + ADDR_CHECK_FREQUENCY;
+       add_timer(&addr_chk_timer);
+}
+
+void addrconf_cleanup(void)
+{
+       struct inet6_dev *idev, *bidev;
+       struct inet6_ifaddr *ifa, *bifa;
+       int i;
+
+       del_timer(&addr_chk_timer);
+
+       /*
+        *      clean dev list.
+        */
+
+       for (idev = inet6_dev_lst; idev; )
+       {
+               bidev = idev;
+               idev = idev->next;
+               kfree(bidev);
+       }
+
+       /*
+        *      clean addr_list
+        */
+
+       for (i=0; i<16; i++)
+       {
+               for (ifa=inet6_addr_lst[i]; ifa; )
+               {
+                       bifa = ifa;
+                       ifa = ifa->lst_next;
+                       kfree(bifa);
+               }
+       }
+
+       proc_unregister(&proc_net, iface_proc_entry.low_ino);
+}
+
+/*
+ * Local variables:
+ * c-file-style: "Linux"
+ * End:
+ */
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
new file mode 100644 (file)
index 0000000..964a672
--- /dev/null
@@ -0,0 +1,865 @@
+/*
+ *     AF_INET6 socket family
+ *     Linux INET6 implementation 
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *     Adapted from linux/net/ipv4/af_inet.c
+ *
+ *     $Id: af_inet6.c,v 1.13 1996/10/31 19:47:17 roque Exp $
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/arp.h>
+#include <net/rarp.h>
+#include <net/route.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/raw.h>
+#include <net/icmp.h>
+#include <linux/icmpv6.h>
+#include <net/inet_common.h>
+#include <net/transp_v6.h>
+#include <net/ndisc.h>
+#include <net/ipv6_route.h>
+#include <net/sit.h>
+#include <linux/ip_fw.h>
+#include <net/addrconf.h>
+
+/*
+ *     Default callbacks for user INET sockets. These just wake up
+ *     the user owning the socket.
+ */
+
+static void def_callback1(struct sock *sk)
+{
+       if(!sk->dead)
+               wake_up_interruptible(sk->sleep);
+}
+
+static void def_callback2(struct sock *sk,int len)
+{
+       if(!sk->dead)
+       {
+               wake_up_interruptible(sk->sleep);
+               sock_wake_async(sk->socket, 1);
+       }
+}
+
+static void def_callback3(struct sock *sk)
+{
+       long wmem;
+       
+       wmem = (long) sk->wmem_alloc;
+
+       if (wmem < 0) {
+               printk(KERN_DEBUG "bug wmem_alloc < 0\n");
+               sk->wmem_alloc = 0;
+       }
+               
+       if(!sk->dead && sk->wmem_alloc*2 <= sk->sndbuf)
+       {
+               wake_up_interruptible(sk->sleep);
+               sock_wake_async(sk->socket, 2);
+       }
+}
+
+struct sock * rawv6_sock_array[SOCK_ARRAY_SIZE];
+
+static int inet6_create(struct socket *sock, int protocol)
+{
+       struct sock *sk;
+       struct proto *prot;
+       int err;
+
+       sk = (struct sock *) kmalloc(sizeof(*sk), GFP_KERNEL);
+       if (sk == NULL) 
+               return(-ENOBUFS);
+
+       /* Efficient way to set most fields to zero */
+       memset(sk,0,sizeof(*sk));       
+
+       /*
+        *      Note for tcp that also wiped the dummy_th block for us.
+        */
+
+       switch(sock->type) 
+       {
+               case SOCK_STREAM:
+               case SOCK_SEQPACKET:
+                       if (protocol && protocol != IPPROTO_TCP) 
+                       {
+                               kfree_s((void *)sk, sizeof(*sk));
+                               return(-EPROTONOSUPPORT);
+                       }
+                       protocol = IPPROTO_TCP;
+                       sk->no_check = TCP_NO_CHECK;
+                       prot = &tcpv6_prot;
+                       break;
+
+               case SOCK_DGRAM:
+                       if (protocol && protocol != IPPROTO_UDP) 
+                       {
+                               kfree_s((void *)sk, sizeof(*sk));
+                               return(-EPROTONOSUPPORT);
+                       }
+                       protocol = IPPROTO_UDP;
+                       sk->no_check = UDP_NO_CHECK;
+                       prot=&udpv6_prot;
+                       break;
+      
+               case SOCK_RAW:
+                       if (!suser()) 
+                       {
+                               kfree_s((void *)sk, sizeof(*sk));
+                               return(-EPERM);
+                       }
+                       if (!protocol) 
+                       {
+                               kfree_s((void *)sk, sizeof(*sk));
+                               return(-EPROTONOSUPPORT);
+                       }
+                       prot = &rawv6_prot;
+                       sk->reuse = 1;
+                       sk->num = protocol;
+                       break;
+               default:
+                       kfree_s((void *)sk, sizeof(*sk));
+                       return(-ESOCKTNOSUPPORT);
+       }
+
+       sk->socket = sock;
+
+       sk->family = AF_INET6;
+       sk->type = sock->type;
+       sk->protocol = protocol;
+       sk->allocation = GFP_KERNEL;
+       sk->sndbuf = SK_WMEM_MAX;
+       sk->rcvbuf = SK_RMEM_MAX;
+       sk->priority = 1;
+
+       sk->prot = prot;
+       sk->backlog_rcv = prot->backlog_rcv;
+
+       sk->sleep = sock->wait;
+       sock->data =(void *) sk;
+
+       sk->state = TCP_CLOSE;
+
+       skb_queue_head_init(&sk->write_queue);
+       skb_queue_head_init(&sk->receive_queue);
+       skb_queue_head_init(&sk->back_log);
+
+       sk->timer.data = (unsigned long)sk;
+       sk->timer.function = &net_timer;
+       init_timer(&sk->timer);
+
+       sk->state_change = def_callback1;
+       sk->data_ready   = def_callback2;
+       sk->write_space  = def_callback3;
+       sk->error_report = def_callback1;
+
+       sk->net_pinfo.af_inet6.hop_limit  = ipv6_hop_limit;
+       sk->net_pinfo.af_inet6.mcast_hops = IPV6_DEFAULT_MCASTHOPS;
+       sk->net_pinfo.af_inet6.mc_loop    = 1;
+
+       /*
+        *      init the ipv4 part of the socket since
+        *      we can have sockets using v6 API for ipv4
+        */
+
+       sk->ip_ttl=64;
+
+#ifdef CONFIG_IP_MULTICAST
+       sk->ip_mc_loop=1;
+       sk->ip_mc_ttl=1;
+       *sk->ip_mc_name=0;
+       sk->ip_mc_list=NULL;
+#endif
+
+
+       if (sk->type==SOCK_RAW && protocol==IPPROTO_RAW)
+               sk->ip_hdrincl=1;
+
+       if (sk->num) 
+       {
+               /*
+                * It assumes that any protocol which allows
+                * the user to assign a number at socket
+                * creation time automatically
+                * shares.
+                */
+
+               inet_put_sock(sk->num, sk);
+               sk->dummy_th.source = ntohs(sk->num);
+       }
+
+       if (sk->prot->init) 
+       {
+               err = sk->prot->init(sk);
+               if (err != 0) 
+               {
+                       destroy_sock(sk);
+                       return(err);
+               }
+       }
+       MOD_INC_USE_COUNT;
+       return(0);
+}
+
+static int inet6_dup(struct socket *newsock, struct socket *oldsock)
+{
+       return(inet6_create(newsock, 
+                           ((struct sock *)(oldsock->data))->protocol));
+}
+
+
+/*
+ *     bind for INET6 API      
+ */
+
+static int inet6_bind(struct socket *sock, struct sockaddr *uaddr,
+                     int addr_len)
+{
+       struct sockaddr_in6 *addr=(struct sockaddr_in6 *)uaddr;
+       struct sock *sk=(struct sock *)sock->data, *sk2;
+       __u32 v4addr = 0;
+       unsigned short snum = 0;
+       int addr_type = 0;
+
+       /*
+        *      If the socket has its own bind function then use it.
+        */
+        
+       if(sk->prot->bind)
+               return sk->prot->bind(sk, uaddr, addr_len);
+               
+       /* check this error. */
+       if (sk->state != TCP_CLOSE)
+               return(-EIO);
+
+       if(addr_len < sizeof(struct sockaddr_in6))
+               return -EINVAL;
+               
+       if(sock->type != SOCK_RAW)
+       {
+               if (sk->num != 0) 
+                       return(-EINVAL);
+
+               snum = ntohs(addr->sin6_port);
+               
+               if (snum == 0) 
+                       snum = get_new_socknum(sk->prot, 0);
+
+               if (snum < PROT_SOCK && !suser()) 
+                       return(-EACCES);
+       }
+       
+       addr_type = ipv6_addr_type(&addr->sin6_addr);
+
+       if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
+       {
+               return(-EINVAL);
+       }
+
+       /*
+        *      check if the address belongs to the host
+        */
+
+       if (addr_type == IPV6_ADDR_MAPPED)
+       {
+               v4addr = addr->sin6_addr.s6_addr32[3];
+
+               if (ip_chk_addr(v4addr) != IS_MYADDR)
+                       return(-EADDRNOTAVAIL);
+       }
+       else
+       {
+               if (addr_type != IPV6_ADDR_ANY)
+               {
+                       /* 
+                        *      ipv4 addr of the socket is invalid.
+                        *      only the unpecified and mapped address  
+                        *      have a v4 equivalent.
+                        */
+
+                       v4addr = LOOPBACK4_IPV6;
+
+                       if (!(addr_type & IPV6_ADDR_MULTICAST))
+                       {
+                               if (ipv6_chk_addr(&addr->sin6_addr) == NULL)
+                                       return(-EADDRNOTAVAIL);
+                       }
+               }
+       }
+
+       sk->rcv_saddr = v4addr;
+       sk->saddr = v4addr;
+               
+       memcpy(&sk->net_pinfo.af_inet6.rcv_saddr, &addr->sin6_addr, 
+              sizeof(struct in6_addr));
+               
+       if (!(addr_type & IPV6_ADDR_MULTICAST))
+               memcpy(&sk->net_pinfo.af_inet6.saddr, &addr->sin6_addr, 
+                      sizeof(struct in6_addr));
+
+       if(sock->type != SOCK_RAW)
+       {
+               /* Make sure we are allowed to bind here. */
+               cli();
+               for(sk2 = sk->prot->sock_array[snum & (SOCK_ARRAY_SIZE -1)];
+                                       sk2 != NULL; sk2 = sk2->next) 
+               {
+                       /*
+                        *      Hash collision or real match ?
+                        */
+                        
+                       if (sk2->num != snum) 
+                               continue;
+                               
+                       /*
+                        *      Either bind on the port is wildcard means
+                        *      they will overlap and thus be in error.
+                        *      We use the sk2 v4 address to test the 
+                        *      other socket since addr_any in av4 implies
+                        *      addr_any in v6
+                        */                     
+                        
+                       if (addr_type == IPV6_ADDR_ANY || (!sk2->rcv_saddr))
+                       {
+                               /*
+                                *      Allow only if both are setting reuse.
+                                */
+                               if(sk2->reuse && sk->reuse && sk2->state!=TCP_LISTEN)
+                                       continue;
+                               sti();
+                               return(-EADDRINUSE);
+                       }
+
+                       /*
+                        *      Two binds match ?
+                        */
+
+                       if (ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
+                                         &sk2->net_pinfo.af_inet6.rcv_saddr))
+
+                               continue;
+                       /*
+                        *      Reusable port ?
+                        */
+
+                       if (!sk->reuse)
+                       {
+                               sti();
+                               return(-EADDRINUSE);
+                       }
+                       
+                       /*
+                        *      Reuse ?
+                        */
+                        
+                       if (!sk2->reuse || sk2->state==TCP_LISTEN) 
+                       {
+                               sti();
+                               return(-EADDRINUSE);
+                       }
+               }
+               sti();
+
+               inet_remove_sock(sk);
+               
+               /*
+               if(sock->type==SOCK_DGRAM)
+                       udp_cache_zap();
+               if(sock->type==SOCK_STREAM)
+                       tcp_cache_zap();
+                       */
+               inet_put_sock(snum, sk);
+               sk->dummy_th.source = ntohs(sk->num);
+               sk->dummy_th.dest = 0;
+               sk->daddr = 0;
+       }
+
+       return(0);
+}
+
+static int inet6_release(struct socket *sock, struct socket *peer)
+{
+       MOD_DEC_USE_COUNT;
+       return inet_release(sock, peer);
+}
+
+static int inet6_socketpair(struct socket *sock1, struct socket *sock2)
+{
+       return(-EOPNOTSUPP);
+}
+
+/*
+ *     This does both peername and sockname.
+ */
+static int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
+                int *uaddr_len, int peer)
+{
+       struct sockaddr_in6 *sin=(struct sockaddr_in6 *)uaddr;
+       struct sock *sk;
+  
+       sin->sin6_family = AF_INET6;
+       sk = (struct sock *) sock->data;
+       if (peer) 
+       {
+               if (!tcp_connected(sk->state))
+                       return(-ENOTCONN);
+               sin->sin6_port = sk->dummy_th.dest;
+               memcpy(&sin->sin6_addr, &sk->net_pinfo.af_inet6.daddr,
+                      sizeof(struct in6_addr));
+       } 
+       else 
+       {
+               if (ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr) ==
+                   IPV6_ADDR_ANY)
+                       memcpy(&sin->sin6_addr, 
+                              &sk->net_pinfo.af_inet6.saddr,
+                              sizeof(struct in6_addr));
+
+               else
+                       memcpy(&sin->sin6_addr, 
+                              &sk->net_pinfo.af_inet6.rcv_saddr,
+                              sizeof(struct in6_addr));
+
+               sin->sin6_port = sk->dummy_th.source;
+
+       }
+       
+       *uaddr_len = sizeof(*sin);      
+       return(0);
+}
+
+static int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+       struct sock *sk=(struct sock *)sock->data;
+       int err;
+       int pid;
+
+       switch(cmd) 
+       {
+       case FIOSETOWN:
+       case SIOCSPGRP:
+               err = get_user(pid, (int *) arg);
+               if(err)
+                       return err;
+
+               /* see inet_fcntl */
+               if (current->pid != pid && current->pgrp != -pid && !suser())
+                       return -EPERM;
+               sk->proc = pid;
+               return(0);
+       case FIOGETOWN:
+       case SIOCGPGRP:
+               err = put_user(sk->proc,(int *)arg);
+               if(err)
+                       return err;
+               return(0);
+       case SIOCGSTAMP:
+               if(sk->stamp.tv_sec==0)
+                       return -ENOENT;
+               err = copy_to_user((void *)arg, &sk->stamp,
+                                  sizeof(struct timeval));
+               if (err)
+                       return -EFAULT;
+               return 0;
+
+       case SIOCADDRT:
+       case SIOCDELRT:
+         
+               return(ipv6_route_ioctl(cmd,(void *)arg));
+
+       case SIOCGIFCONF:
+       case SIOCGIFFLAGS:
+       case SIOCSIFFLAGS:
+       case SIOCADDMULTI:
+       case SIOCDELMULTI:
+/*
+
+  this ioctls deal with addresses
+  must process the addr info before
+  calling dev_ioctl to perform dev specific functions
+
+       case SIOCGIFADDR:
+       case SIOCSIFADDR:
+
+
+       case SIOCGIFDSTADDR:
+
+       case SIOCGIFBRDADDR:
+       case SIOCSIFBRDADDR:
+       case SIOCGIFNETMASK:
+       case SIOCSIFNETMASK:
+       */
+
+       case SIOCGIFMETRIC:
+       case SIOCSIFMETRIC:
+       case SIOCGIFMEM:
+       case SIOCSIFMEM:
+       case SIOCGIFMTU:
+       case SIOCSIFMTU:
+       case SIOCSIFLINK:
+       case SIOCGIFHWADDR:
+       case SIOCSIFHWADDR:
+       case SIOCSIFMAP:
+       case SIOCGIFMAP:
+       case SIOCSIFSLAVE:
+       case SIOCGIFSLAVE:
+
+               return(dev_ioctl(cmd,(void *) arg));            
+               
+               return -EINVAL;
+
+       case SIOCSIFADDR:
+               return addrconf_add_ifaddr((void *) arg);
+       case SIOCSIFDSTADDR:
+               return addrconf_set_dstaddr((void *) arg);
+       default:
+               if ((cmd >= SIOCDEVPRIVATE) &&
+                   (cmd <= (SIOCDEVPRIVATE + 15)))
+                       return(dev_ioctl(cmd,(void *) arg));
+               
+               if (sk->prot->ioctl==NULL) 
+                       return(-EINVAL);
+               return(sk->prot->ioctl(sk, cmd, arg));
+       }
+       /*NOTREACHED*/
+       return(0);
+}
+
+/*
+ * This routine must find a socket given a TCP or UDP header.
+ * Everything is assumed to be in net order.
+ *
+ * We give priority to more closely bound ports: if some socket
+ * is bound to a particular foreign address, it will get the packet
+ * rather than somebody listening to any address..
+ */
+
+struct sock *inet6_get_sock(struct proto *prot, 
+                           struct in6_addr *loc_addr, 
+                           struct in6_addr *rmt_addr,                     
+                           unsigned short loc_port,
+                           unsigned short rmt_port)
+{
+       struct sock *s;
+       struct sock *result = NULL;
+       int badness = -1;
+       unsigned short hnum;
+       struct ipv6_pinfo *np;
+       hnum = ntohs(loc_port);
+
+       /*
+        * SOCK_ARRAY_SIZE must be a power of two.  This will work better
+        * than a prime unless 3 or more sockets end up using the same
+        * array entry.  This should not be a problem because most
+        * well known sockets don't overlap that much, and for
+        * the other ones, we can just be careful about picking our
+        * socket number when we choose an arbitrary one.
+        */
+
+       for(s = prot->sock_array[hnum & (SOCK_ARRAY_SIZE - 1)];
+                       s != NULL; s = s->next) 
+       {
+               int score = 0;
+               
+               if ((s->num != hnum) || s->family != AF_INET6)
+                       continue;
+
+               if(s->dead && (s->state == TCP_CLOSE))
+               {
+                       printk(KERN_DEBUG "dead or closed socket\n");
+                       continue;
+               }
+
+               np = &s->net_pinfo.af_inet6;
+
+               /* remote port matches? */
+
+               if (s->dummy_th.dest) {
+                       if (s->dummy_th.dest != rmt_port)
+                       {
+                               continue;
+                       }
+                       score++;
+               }
+
+               /* local address matches? */
+
+               if (!ipv6_addr_any(&np->rcv_saddr))
+               {
+                       if (ipv6_addr_cmp(&np->rcv_saddr, loc_addr))
+                       {
+                               continue;
+                       }
+                       score++;
+               }
+
+               /* remote address matches? */
+               if (!ipv6_addr_any(&np->daddr))
+               {
+                       if (ipv6_addr_cmp(&np->daddr, rmt_addr))
+                       {
+                               continue;
+                       }
+                       score++;
+               }
+
+               /* perfect match? */
+               if (score == 3)
+                       return s;
+               /* no, check if this is the best so far.. */
+               if (score <= badness)
+                       continue;
+               result = s;
+               badness = score;
+       }
+       return result;
+}
+
+static int __inline__ inet6_mc_check(struct sock *sk, struct in6_addr *addr)
+{
+       struct ipv6_mc_socklist *mc;
+               
+       for (mc = sk->net_pinfo.af_inet6.ipv6_mc_list; mc; mc=mc->next)
+       {
+               if (ipv6_addr_cmp(&mc->addr, addr) == 0)
+                       return 1;
+       }
+
+       return 0;
+}
+
+/*
+ *     Deliver a datagram to raw sockets.
+ */
+struct sock *inet6_get_sock_raw(struct sock *sk, unsigned short num,
+                               struct in6_addr *loc_addr, 
+                               struct in6_addr *rmt_addr)
+                         
+{
+       struct sock *s;
+       struct ipv6_pinfo *np;
+       int addr_type = 0;
+
+       s=sk;
+
+       addr_type = ipv6_addr_type(loc_addr);
+
+       for(; s != NULL; s = s->next) 
+       {
+               if (s->num != num) 
+                       continue;
+
+               if(s->dead && (s->state == TCP_CLOSE))
+                       continue;
+
+               np = &s->net_pinfo.af_inet6;
+
+               if (!ipv6_addr_any(&np->daddr) &&
+                   ipv6_addr_cmp(&np->daddr, rmt_addr))
+               {
+                       continue;
+               }
+
+               if (!ipv6_addr_any(&np->rcv_saddr))
+               {
+                       if (ipv6_addr_cmp(&np->rcv_saddr, loc_addr) == 0)
+                               return(s);
+               
+                       if ((addr_type & IPV6_ADDR_MULTICAST) &&
+                           inet6_mc_check(s, loc_addr))
+                               return (s);
+                       
+                       continue;
+               }
+
+               return(s);
+       }
+       return(NULL);
+}
+
+/*
+ *     inet6_get_sock_mcast for UDP sockets.
+ */
+
+struct sock *inet6_get_sock_mcast(struct sock *sk, 
+                                 unsigned short num, unsigned short rmt_port,
+                                 struct in6_addr *loc_addr, 
+                                 struct in6_addr *rmt_addr)
+{      
+       struct sock *s;
+       struct ipv6_pinfo *np;
+
+       s=sk;
+
+       for(; s != NULL; s = s->next) 
+       {
+               if (s->num != num) 
+                       continue;
+
+               if(s->dead && (s->state == TCP_CLOSE))
+                       continue;
+
+               np = &s->net_pinfo.af_inet6;
+
+               if (s->dummy_th.dest) {
+                       if (s->dummy_th.dest != rmt_port)
+                       {
+                               continue;
+                       }
+               }
+
+               if (!ipv6_addr_any(&np->daddr) &&
+                   ipv6_addr_cmp(&np->daddr, rmt_addr))
+               {
+                       continue;
+               }
+
+
+               if (!ipv6_addr_any(&np->rcv_saddr))
+               {
+                       if (ipv6_addr_cmp(&np->rcv_saddr, loc_addr) == 0)
+                               return(s);
+               }
+               
+               if (!inet6_mc_check(s, loc_addr))
+               {
+                       continue;
+               }
+
+               return(s);
+       }
+       return(NULL);
+}
+       
+
+static struct proto_ops inet6_proto_ops = {
+       AF_INET6,
+
+       inet6_create,
+       inet6_dup,
+       inet6_release,
+       inet6_bind,
+       inet_connect,                   /* ok           */
+       inet6_socketpair,               /* a do nothing */
+       inet_accept,                    /* ok           */
+       inet6_getname, 
+       inet_select,                    /* ok           */
+       inet6_ioctl,                    /* must change  */
+       inet_listen,                    /* ok           */
+       inet_shutdown,                  /* ok           */
+       inet_setsockopt,                /* ok           */
+       inet_getsockopt,                /* ok           */
+       inet_fcntl,                     /* ok           */
+       inet_sendmsg,                   /* ok           */
+       inet_recvmsg                    /* ok           */
+};
+
+#ifdef MODULE
+int init_module(void)
+#else
+void inet6_proto_init(struct net_proto *pro)
+#endif
+{
+       int i;
+
+       printk(KERN_INFO "IPv6 v0.1\n");
+
+       sock_register(inet6_proto_ops.family, &inet6_proto_ops);
+       
+       for(i = 0; i < SOCK_ARRAY_SIZE; i++) 
+       {
+               rawv6_sock_array[i] = NULL;
+       }
+
+       /*
+        *      ipngwg API draft makes clear that the correct semantics
+        *      for TCP and UDP is to consider one TCP and UDP instance
+        *      in a host availiable by both INET and INET6 APIs and
+        *      hable to communicate via both network protocols.
+        */
+       
+       tcpv6_prot.inuse = 0;
+       tcpv6_prot.highestinuse = 0;       
+       tcpv6_prot.sock_array = tcp_sock_array;
+
+       udpv6_prot.inuse = 0;
+       udpv6_prot.highestinuse = 0;
+       udpv6_prot.sock_array = udp_sock_array;
+
+       rawv6_prot.inuse = 0;
+       rawv6_prot.highestinuse = 0;
+       rawv6_prot.sock_array = rawv6_sock_array;
+       
+       ipv6_init();
+
+       icmpv6_init(&inet6_proto_ops);
+       ndisc_init(&inet6_proto_ops);
+
+        addrconf_init();
+        sit_init();
+
+       /* init v6 transport protocols */
+
+       udpv6_init();
+       /* add /proc entries here */
+
+       tcpv6_init();
+
+#ifdef MODULE
+       return 0;
+#endif
+}
+
+#ifdef MODULE
+void cleanup_module(void)
+{
+       sit_cleanup();
+       ipv6_cleanup();
+       sock_unregister(AF_INET6);
+}
+#endif
+
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
new file mode 100644 (file)
index 0000000..92a6e5d
--- /dev/null
@@ -0,0 +1,202 @@
+/*
+ *     common UDP/RAW code
+ *     Linux INET6 implementation 
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *     $Id: datagram.c,v 1.3 1996/10/11 16:03:05 roque Exp $
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/ipv6_route.h>
+#include <net/addrconf.h>
+#include <net/transp_v6.h>
+
+
+int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
+{
+       struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+       struct ipv6_options *opt = (struct ipv6_options *) skb->proto_priv;
+       struct cmsghdr *cmsg = msg->msg_control;
+       int len = msg->msg_controllen;
+
+       msg->msg_controllen = 0;
+       
+       if (np->rxinfo && (len >= sizeof(struct cmsghdr) +
+                          sizeof(struct in6_pktinfo)))
+       {
+               struct in6_pktinfo *src_info;
+               struct inet6_dev *in6_dev;
+
+               cmsg->cmsg_len = (sizeof(struct cmsghdr) + 
+                                 sizeof(struct in6_pktinfo));
+               cmsg->cmsg_level = SOL_IPV6;
+               cmsg->cmsg_type = IPV6_RXINFO;
+
+               src_info = (struct in6_pktinfo *) cmsg->cmsg_data;
+               in6_dev = ipv6_get_idev(skb->dev);
+
+               if (in6_dev == NULL)
+               {
+                       printk(KERN_DEBUG "recv_ctl: unknown device\n");
+                       return -ENODEV;
+               }
+
+               src_info->ipi6_ifindex = in6_dev->if_index;
+               ipv6_addr_copy(&src_info->ipi6_addr,
+                              &skb->ipv6_hdr->daddr);
+
+               len -= cmsg->cmsg_len;
+               msg->msg_controllen += cmsg->cmsg_len;
+               cmsg = (struct cmsghdr *)((u8*) cmsg + cmsg->cmsg_len);
+       }
+
+       if (opt->srcrt)
+       {
+               int hdrlen = sizeof(struct rt0_hdr) + (opt->srcrt->hdrlen << 3);
+
+               if (len >= sizeof(struct cmsghdr) + hdrlen)
+               {
+                       struct rt0_hdr *rt0;
+
+                       cmsg->cmsg_len = sizeof(struct cmsghdr) + hdrlen;
+                       cmsg->cmsg_level = SOL_IPV6;
+                       cmsg->cmsg_type = IPV6_RXINFO;
+               
+                       rt0 = (struct rt0_hdr *) cmsg->cmsg_data;
+                       memcpy(rt0, opt->srcrt, hdrlen);
+
+                       len -= cmsg->cmsg_len;
+                       msg->msg_controllen += cmsg->cmsg_len;
+                       cmsg = (struct cmsghdr *)((u8*) cmsg + cmsg->cmsg_len);
+               }
+       }
+       return 0;
+}
+                        
+
+int datagram_send_ctl(struct msghdr *msg, struct device **src_dev,
+                     struct in6_addr **src_addr, struct ipv6_options *opt)
+{
+       struct inet6_dev *in6_dev = NULL;
+       struct in6_pktinfo *src_info;
+       struct cmsghdr *cmsg;
+       struct ipv6_rt_hdr *rthdr;
+       int len;
+       int err = -EINVAL;
+
+       for (cmsg = msg->msg_control; cmsg; cmsg = cmsg_nxthdr(msg, cmsg))
+       {
+               if (cmsg->cmsg_level != SOL_IPV6)
+               {
+                       printk(KERN_DEBUG "cmsg_level %d\n", cmsg->cmsg_level);
+                       continue;
+               }
+
+               switch (cmsg->cmsg_type) {
+
+               case IPV6_TXINFO:
+                       if (cmsg->cmsg_len < (sizeof(struct cmsghdr) +
+                                             sizeof(struct in6_pktinfo)))
+                       {
+                               goto exit_f;
+                       }
+
+                       src_info = (struct in6_pktinfo *) cmsg->cmsg_data;
+                       
+                       if (src_info->ipi6_ifindex)
+                       {
+                               in6_dev = ipv6_dev_by_index(src_info->ipi6_ifindex);
+                               if (in6_dev == NULL)
+                               {
+                                       goto exit_f;
+                               }
+
+                               *src_dev = in6_dev->dev;
+                       }
+                       
+                       if (!ipv6_addr_any(&src_info->ipi6_addr))
+                       {
+                               struct inet6_ifaddr *ifp;
+
+                               ifp = ipv6_chk_addr(&src_info->ipi6_addr);
+
+                               if ( ifp == NULL)
+                               {
+                                       goto exit_f;
+                               }
+
+                               if (in6_dev && ifp->scope == IFA_LINK &&
+                                   in6_dev != ifp->idev)
+                               {
+                                       goto exit_f;
+                               }
+
+                               *src_addr = &src_info->ipi6_addr;
+                               err = 0;
+                       }
+
+                       break;
+                       
+               case SCM_SRCRT:
+
+                       len = cmsg->cmsg_len;
+
+                       len -= sizeof(struct cmsghdr);
+
+                       /* validate option length */
+                       if (len < sizeof(struct ipv6_rt_hdr))
+                       {
+                               goto exit_f;
+                       }
+
+                       rthdr = (struct ipv6_rt_hdr *) cmsg->cmsg_data;
+
+                       /*
+                        *      TYPE 0
+                        */
+                       if (rthdr->type)
+                       {
+                               goto exit_f;
+                       }
+
+                       if (((rthdr->hdrlen + 1) << 3) < len)
+                       {                               
+                               goto exit_f;
+                       }
+
+                       /* segments left must also match */
+                       if ((rthdr->hdrlen >> 1) != rthdr->segments_left)
+                       {
+                               goto exit_f;
+                       }
+                       
+                       opt->opt_nflen += ((rthdr->hdrlen + 1) << 3);
+                       opt->srcrt = rthdr;
+                       err = 0;
+
+                       break;
+               default:
+                       printk(KERN_DEBUG "invalid cmsg type: %d\n",
+                              cmsg->cmsg_type);
+                       break;
+               }
+       }
+
+  exit_f:
+       return err;
+}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
new file mode 100644 (file)
index 0000000..6c5c8ab
--- /dev/null
@@ -0,0 +1,173 @@
+/*
+ *     Extension Header handling for IPv6
+ *     Linux INET6 implementation
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>
+ *
+ *     $Id: exthdrs.c,v 1.7 1996/09/12 18:44:18 roque Exp $
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/ipv6_route.h>
+#include <net/addrconf.h>
+
+/*
+ *     inbound
+ */
+
+int ipv6_routing_header(struct sk_buff **skb_ptr, struct device *dev,
+                       __u8 *nhptr, struct ipv6_options *opt)
+{
+       struct sk_buff *skb = *skb_ptr;
+       struct in6_addr *addr;
+       struct in6_addr daddr;
+       int addr_type = 0;
+       int strict = 0;
+       __u32 bit_map;
+       int pos;
+       int n, i;
+
+       struct ipv6_rt_hdr *hdr = (struct ipv6_rt_hdr *) skb->h.raw;
+       struct rt0_hdr *rthdr;
+
+       if (hdr->segments_left == 0)
+       {
+               struct ipv6_options *opt;
+
+               opt = (struct ipv6_options *) skb->proto_priv;
+               opt->srcrt = hdr;
+
+               skb->h.raw += (hdr->hdrlen + 1) << 3;
+               return hdr->nexthdr;            
+       }
+
+       if (hdr->type != IPV6_SRCRT_TYPE_0 || hdr->hdrlen & 0x01 ||
+           hdr->hdrlen > 46)
+       {
+                /* 
+                *      Discard 
+                */
+               
+               pos = (__u8 *) hdr - (__u8 *) skb->ipv6_hdr + 2;
+
+               if (hdr->type)
+                       pos += 2;
+               else
+                       pos += 1;
+
+               icmpv6_send(skb, ICMPV6_PARAMETER_PROB, 0, pos, dev);
+               kfree_skb(skb, FREE_READ);
+               return 0;       
+       }
+
+       /*
+        *      This is the routing header forwarding algorithm from
+        *      RFC 1883, page 17.
+        */
+
+       n = hdr->hdrlen >> 1;
+
+       if (hdr->segments_left > n)
+       {
+               pos = (__u8 *) hdr - (__u8 *) skb->ipv6_hdr + 2;
+
+               pos += 3;
+
+               icmpv6_send(skb, ICMPV6_PARAMETER_PROB, 0, pos, dev);
+               kfree_skb(skb, FREE_READ);
+               return 0;
+       }
+
+       i = n - --hdr->segments_left;
+
+       rthdr = (struct rt0_hdr *) hdr;
+       addr = rthdr->addr;
+       addr += i - 1;
+
+       addr_type = ipv6_addr_type(addr);
+
+       if (addr_type == IPV6_ADDR_MULTICAST)
+       {
+               kfree_skb(skb, FREE_READ);
+               return 0;
+       }
+
+       ipv6_addr_copy(&daddr, addr);
+       ipv6_addr_copy(addr, &skb->ipv6_hdr->daddr);
+       ipv6_addr_copy(&skb->ipv6_hdr->daddr, &daddr);
+
+       /*
+        *      Check Strick Source Route
+        */
+
+       bit_map = ntohl(rthdr->bitmap);
+
+       if ((bit_map & (1 << i)) == IPV6_SRCRT_STRICT)
+       {
+               strict = 1;
+       }
+
+       ipv6_forward(skb, dev, (strict ? IP6_FW_STRICT : 0) | IP6_FW_SRCRT);
+
+       return 0;
+}
+
+
+/*
+ *     outbound
+ */
+
+int ipv6opt_bld_rthdr(struct sk_buff *skb, struct ipv6_options *opt,
+                     struct in6_addr *addr, int proto)               
+{
+       struct rt0_hdr *phdr, *ihdr;
+       int hops;
+
+       ihdr = (struct rt0_hdr *) opt->srcrt;
+       
+       phdr = (struct rt0_hdr *) skb_put(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
+       memcpy(phdr, ihdr, sizeof(struct ipv6_rt_hdr));
+
+       hops = ihdr->rt_hdr.hdrlen >> 1;
+       
+       if (hops > 1)
+       {
+               memcpy(phdr->addr, ihdr->addr + 1,
+                      (hops - 1) * sizeof(struct in6_addr));
+       }
+
+       ipv6_addr_copy(phdr->addr + (hops - 1), addr);
+       
+       phdr->rt_hdr.nexthdr = proto;
+
+       return NEXTHDR_ROUTING;
+}
+
+/*
+ * Local variables:
+ * c-file-style: "Linux"
+ * End:
+ */
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
new file mode 100644 (file)
index 0000000..1d6da70
--- /dev/null
@@ -0,0 +1,553 @@
+/*
+ *     Internet Control Message Protocol (ICMPv6)
+ *     Linux INET6 implementation
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>
+ *
+ *     Based on net/ipv4/icmp.c
+ *
+ *     RFC 1885
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#define __NO_VERSION__
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/skbuff.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/icmpv6.h>
+
+#include <net/ip.h>
+#include <net/sock.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/route.h>
+#include <net/ndisc.h>
+#include <net/raw.h>
+#include <net/inet_common.h>
+#include <net/transp_v6.h>
+#include <net/ipv6_route.h>
+#include <net/addrconf.h>
+#include <net/rawv6.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+/*
+ *     ICMP socket for flow control.
+ */
+
+static struct socket icmpv6_socket;
+
+int icmpv6_rcv(struct sk_buff *skb, struct device *dev,
+              struct in6_addr *saddr, struct in6_addr *daddr,
+              struct ipv6_options *opt, unsigned short len,
+              int redo, struct inet6_protocol *protocol);
+
+static struct inet6_protocol icmpv6_protocol = 
+{
+       icmpv6_rcv,             /* handler              */
+       NULL,                   /* error control        */
+       NULL,                   /* next                 */
+       IPPROTO_ICMPV6,         /* protocol ID          */
+       0,                      /* copy                 */
+       NULL,                   /* data                 */
+       "ICMPv6"                /* name                 */
+};
+
+
+
+struct icmpv6_msg {
+       struct icmpv6hdr        icmph;
+       __u8                    *data;
+       struct in6_addr         *daddr;
+       int                     len;
+       __u32                   csum;
+};
+
+
+
+/*
+ *     getfrag callback
+ *     not static because it's needed in ndisc.c
+ */
+
+static void icmpv6_getfrag(const void *data, struct in6_addr *saddr, 
+                          char *buff, unsigned int offset, unsigned int len)
+{
+       struct icmpv6_msg *msg = (struct icmpv6_msg *) data;
+       struct icmpv6hdr *icmph;
+       __u32 csum;
+
+       /* 
+        *      in theory offset must be 0 since we never send more 
+        *      than 576 bytes on an error or more than the path mtu
+        *      on an echo reply. (those are the rules on RFC 1883)
+        */
+
+       if (offset)
+       {
+               csum = csum_partial_copy((void *) msg->data +
+                                        offset - sizeof(struct icmpv6hdr), 
+                                        buff, len, msg->csum);
+               msg->csum = csum;
+               return;
+       }
+
+       csum = csum_partial_copy((void *) &msg->icmph, buff,
+                                sizeof(struct icmpv6hdr), msg->csum);
+
+       csum = csum_partial_copy((void *) msg->data, 
+                                buff + sizeof(struct icmpv6hdr),
+                                len - sizeof(struct icmpv6hdr), csum);
+
+       icmph = (struct icmpv6hdr *) buff;
+
+       icmph->checksum = csum_ipv6_magic(saddr, msg->daddr, msg->len,
+                                         IPPROTO_ICMPV6, csum);
+}
+
+/*
+ *     an inline helper for the "simple" if statement bellow
+ *     checks if parameter problem report is caused by an
+ *     unrecognized IPv6 option that has the Option Type 
+ *     highest-order two bits set to 10
+ */
+static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
+{
+       char *buff = (char *) skb->ipv6_hdr;
+
+       return ( ( *(buff + offset) & 0xC0 ) == 0x80 );
+}
+
+/*
+ *     Send an ICMP message in response to a packet in error
+ */
+
+void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, 
+                struct device *dev)
+{
+       struct ipv6hdr *hdr = skb->ipv6_hdr;
+       struct sock *sk = (struct sock *) icmpv6_socket.data;
+       struct in6_addr *saddr = NULL;
+       struct device *src_dev = NULL;
+       struct icmpv6_msg msg;
+       int addr_type = 0;
+       int optlen;
+       int len;
+
+       /*
+        *      sanity check pointer in case of parameter problem
+        */
+
+       if (type == ICMPV6_PARAMETER_PROB && 
+           (info > (skb->tail - ((unsigned char *) hdr))))
+       {
+               printk(KERN_DEBUG "icmpv6_send: bug! pointer > skb\n");
+               return;
+       }
+
+       /*
+        *      Make sure we respect the rules 
+        *      i.e. RFC 1885 2.4(e)
+        *      Rule (e.1) is enforced by not using icmpv6_send
+        *      in any code that processes icmp errors.
+        */
+       
+       addr_type = ipv6_addr_type(&hdr->daddr);
+
+       if (ipv6_chk_addr(&hdr->daddr))
+       {
+               saddr = &hdr->daddr;
+       }
+
+       /*
+        *      Dest addr check
+        */
+
+       if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST))
+       {
+               if (type != ICMPV6_PKT_TOOBIG &&
+                   !(type == ICMPV6_PARAMETER_PROB && 
+                     code == ICMPV6_UNK_OPTION && 
+                     (opt_unrec(skb, info))))
+               {
+                       return;
+               }
+
+               saddr = NULL;
+       }
+
+       addr_type = ipv6_addr_type(&hdr->saddr);
+
+       /*
+        *      Source addr check
+        */
+
+       if (addr_type & IPV6_ADDR_LINKLOCAL)
+       {
+               src_dev = skb->dev;
+       }
+
+       /*
+        *      Must not send if we know that source is Anycast also.
+        *      for now we don't know that.
+        */
+       if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST))
+       {
+               printk(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n");
+               return;
+       }
+
+       /*
+        *      ok. kick it. checksum will be provided by the 
+        *      getfrag_t callback.
+        */
+
+       msg.icmph.type = type;
+       msg.icmph.code = code;
+       msg.icmph.checksum = 0;
+       msg.icmph.icmp6_pointer = htonl(info);
+
+       msg.data = (__u8 *) skb->ipv6_hdr;
+       msg.csum = 0;
+       msg.daddr = &hdr->saddr;
+        /*
+       if (skb->opt)
+               optlen = skb->opt->optlen;
+       else
+       */
+
+       optlen = 0;
+
+       len = min(skb->tail - ((unsigned char *) hdr), 
+                 576 - sizeof(struct ipv6hdr) - sizeof(struct icmpv6hdr)
+                 - optlen);
+
+       if (len < 0)
+       {
+               printk(KERN_DEBUG "icmp: len problem\n");
+               return;
+       }
+
+       len += sizeof(struct icmpv6hdr);
+
+       msg.len = len;
+
+
+       ipv6_build_xmit(sk, icmpv6_getfrag, &msg, &hdr->saddr, len,
+                       saddr, src_dev, NULL, IPPROTO_ICMPV6, 1);
+}
+
+static void icmpv6_echo_reply(struct sk_buff *skb)
+{
+       struct sock *sk = (struct sock *) icmpv6_socket.data;
+       struct ipv6hdr *hdr = skb->ipv6_hdr;
+       struct icmpv6hdr *icmph = (struct icmpv6hdr *) skb->h.raw;
+       struct in6_addr *saddr;
+       struct icmpv6_msg msg;
+       unsigned char *data;
+       int len;
+
+       data = (char *) (icmph + 1);
+
+       saddr = &hdr->daddr;
+
+       if (ipv6_addr_type(saddr) & IPV6_ADDR_MULTICAST)
+               saddr = NULL;
+
+       len = skb->tail - data;
+       len += sizeof(struct icmpv6hdr);
+
+       msg.icmph.type = ICMPV6_ECHO_REPLY;
+       msg.icmph.code = 0;
+       msg.icmph.checksum = 0;
+       msg.icmph.icmp6_identifier = icmph->icmp6_identifier;
+       msg.icmph.icmp6_sequence = icmph->icmp6_sequence;
+
+       msg.data = data;
+       msg.csum = 0;
+       msg.len = len;
+       msg.daddr = &hdr->saddr;
+       
+       ipv6_build_xmit(sk, icmpv6_getfrag, &msg, &hdr->saddr, len, saddr,
+                       skb->dev, NULL, IPPROTO_ICMPV6, 1);
+}
+
+static __inline__ int ipv6_ext_hdr(u8 nexthdr)
+{
+       /* 
+        * find out if nexthdr is an extension header or a protocol
+        */
+       return ( (nexthdr == NEXTHDR_HOP)       ||
+                (nexthdr == NEXTHDR_ROUTING)   ||
+                (nexthdr == NEXTHDR_FRAGMENT)  ||
+                (nexthdr == NEXTHDR_ESP)       ||
+                (nexthdr == NEXTHDR_AUTH)      ||
+                (nexthdr == NEXTHDR_NONE)      ||
+                (nexthdr == NEXTHDR_DEST) );
+                
+}
+
+static void icmpv6_notify(int type, int code, unsigned char *buff, int len,
+                         struct in6_addr *saddr, struct in6_addr *daddr, 
+                         struct inet6_protocol *protocol)
+{
+       struct ipv6hdr *hdr = (struct ipv6hdr *) buff;
+       struct inet6_protocol *ipprot;
+       struct sock *sk;
+       char * pbuff;
+       __u32 info = 0;
+       int hash;
+       u8 nexthdr;
+
+       /* now skip over extension headers */
+
+       nexthdr = hdr->nexthdr;
+
+       pbuff = (char *) (hdr + 1);
+       len -= sizeof(struct ipv6hdr);
+
+       while (ipv6_ext_hdr(nexthdr)) 
+       {
+               int hdrlen;
+
+               if (nexthdr == NEXTHDR_NONE)
+                       return;
+
+               nexthdr = *pbuff;
+               hdrlen = *(pbuff+1);
+
+               if (((hdrlen + 1) << 3) > len)
+                       return;
+               
+               pbuff += hdrlen;
+               len -= hdrlen;
+       }
+
+       hash = nexthdr & (MAX_INET_PROTOS -1);
+
+       for (ipprot = (struct inet6_protocol *) inet6_protos[hash]; 
+            ipprot != NULL; 
+            ipprot=(struct inet6_protocol *)ipprot->next)
+       {
+               if (ipprot->protocol != nexthdr)
+                       continue;
+
+               if (ipprot->err_handler) 
+               {
+                       ipprot->err_handler(type, code, pbuff, info,
+                                           saddr, daddr, ipprot);
+               }
+               return;
+       }
+
+       /* delivery to upper layer protocols failed. try raw sockets */
+
+       sk = rawv6_prot.sock_array[hash];
+
+       if (sk == NULL)
+       {
+               return;
+       }
+
+       while ((sk = inet6_get_sock_raw(sk, nexthdr, daddr, saddr)))
+       {
+               rawv6_err(sk, type, code, pbuff, saddr, daddr);
+               sk = sk->next;
+       }
+
+       return;
+}
+  
+/*
+ *     Handle icmp messages
+ */
+
+int icmpv6_rcv(struct sk_buff *skb, struct device *dev,
+              struct in6_addr *saddr, struct in6_addr *daddr,
+              struct ipv6_options *opt, unsigned short len,
+              int redo, struct inet6_protocol *protocol)
+{
+       struct ipv6hdr *orig_hdr;
+       struct icmpv6hdr *hdr = (struct icmpv6hdr *) skb->h.raw;
+       int ulen;
+
+       /* perform checksum */
+
+
+       switch (skb->ip_summed) {       
+       case CHECKSUM_NONE:
+               skb->csum = csum_partial((char *)hdr, len, 0);
+       case CHECKSUM_HW:
+               if (csum_ipv6_magic(saddr, daddr, len, IPPROTO_ICMPV6, 
+                                   skb->csum))
+               {
+                       printk(KERN_DEBUG "icmpv6 checksum failed\n");
+                       goto discard_it;
+               }
+       default:
+               /* CHECKSUM_UNNECESSARY */
+       }
+
+       /*
+        *      length of original packet carried in skb
+        */
+       ulen = skb->tail - (unsigned char *) (hdr + 1);
+       
+       switch (hdr->type) {
+
+       case ICMPV6_ECHO_REQUEST:
+               icmpv6_echo_reply(skb);
+               break;
+
+       case ICMPV6_ECHO_REPLY:
+               /* we coulnd't care less */
+               break;
+
+       case ICMPV6_PKT_TOOBIG:
+               orig_hdr = (struct ipv6hdr *) (hdr + 1);
+               if (ulen >= sizeof(struct ipv6hdr))
+               {
+                       rt6_handle_pmtu(&orig_hdr->daddr,
+                                       ntohl(hdr->icmp6_mtu));
+               }
+
+               /*
+                * Drop through to notify
+                */
+
+       case ICMPV6_DEST_UNREACH:
+       case ICMPV6_TIME_EXCEEDED:
+       case ICMPV6_PARAMETER_PROB:
+
+               icmpv6_notify(hdr->type, hdr->code, (char *) (hdr + 1), ulen,
+                             saddr, daddr, protocol);
+               break;
+
+       case NDISC_ROUTER_SOLICITATION:
+       case NDISC_ROUTER_ADVERTISEMENT:
+       case NDISC_NEIGHBOUR_SOLICITATION:
+       case NDISC_NEIGHBOUR_ADVERTISEMENT:
+       case NDISC_REDIRECT:
+               ndisc_rcv(skb, dev, saddr, daddr, opt, len);            
+               break;
+
+       case ICMPV6_MEMBERSHIP_QUERY:
+       case ICMPV6_MEMBERSHIP_REPORT:
+       case ICMPV6_MEMBERSHIP_REDUCTION:
+               /* forward the packet to the igmp module */
+               break;
+
+       default:
+               printk(KERN_DEBUG "icmpv6: msg of unkown type\n");
+               
+               /* informational */
+               if (hdr->type & 0x80)
+               {
+                       goto discard_it;
+               }
+
+               /* 
+                * error of unkown type. 
+                * must pass to upper level 
+                */
+
+               icmpv6_notify(hdr->type, hdr->code, (char *) (hdr + 1), ulen,
+                             saddr, daddr, protocol);  
+       }
+
+  discard_it:
+
+       kfree_skb(skb, FREE_READ);
+       return 0;
+}
+
+void icmpv6_init(struct proto_ops *ops)
+{
+       struct sock *sk;
+       int err;
+
+       icmpv6_socket.type=SOCK_RAW;
+       icmpv6_socket.ops=ops;
+
+       if((err=ops->create(&icmpv6_socket, IPPROTO_ICMPV6))<0)
+               printk(KERN_DEBUG 
+                      "Failed to create the ICMP control socket.\n");
+
+       MOD_DEC_USE_COUNT;
+
+       sk = icmpv6_socket.data;
+       sk->allocation = GFP_ATOMIC;
+       sk->num = 256;                  /* Don't receive any data */
+
+       inet6_add_protocol(&icmpv6_protocol);
+}
+
+static struct icmp6_err {
+       int err;
+       int fatal;
+} tab_unreach[] = {
+       { ENETUNREACH,  0},     /* NOROUTE              */
+       { EACCES,       1},     /* ADM_PROHIBITED       */
+       { EOPNOTSUPP,   1},     /* NOT_NEIGHBOUR        */
+       { EHOSTUNREACH, 0},     /* ADDR_UNREACH         */
+       { ECONNREFUSED, 1},     /* PORT_UNREACH         */
+};
+
+int icmpv6_err_convert(int type, int code, int *err)
+{
+       int fatal = 0;
+
+       *err = 0;
+
+       switch (type) {
+       case ICMPV6_DEST_UNREACH:
+               if (code <= ICMPV6_PORT_UNREACH)
+               {
+                       *err  = tab_unreach[code].err;
+                       fatal = tab_unreach[code].fatal;
+               }
+               break;
+
+       case ICMPV6_PKT_TOOBIG:
+               *err = EMSGSIZE;
+               break;
+               
+       case ICMPV6_PARAMETER_PROB:
+               *err = EPROTO;
+               fatal = 1;
+               break;
+       };
+
+       return fatal;
+}
+
+/*
+ * Local variables:
+ *  compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h  -c -o icmp.o icmp.c"
+ * End:
+ */
diff --git a/net/ipv6/ipv6_input.c b/net/ipv6/ipv6_input.c
new file mode 100644 (file)
index 0000000..64a9d79
--- /dev/null
@@ -0,0 +1,437 @@
+/*
+ *     IPv6 input
+ *     Linux INET6 implementation 
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>
+ *     Ian P. Morris           <I.P.Morris@soton.ac.uk>
+ *
+ *     Based in linux/net/ipv4/ip_input.c
+ *
+ *     $Id: ipv6_input.c,v 1.13 1996/10/11 16:03:06 roque Exp $
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/ipv6_route.h>
+#include <net/addrconf.h>
+
+/*
+ *     Header processing function list
+ *     We process headers in order (as per RFC)
+ *     If the processing function returns 0 the packet is considered 
+ *     delivered else it returns the value of the nexthdr.
+ *     The ptr field of the function points to the previous nexthdr field.
+ *     This is allows the processing function to change it if it's sematics
+ *     is: return a new packet without this header (like fragmentation).
+ *     When a next_header value is not within the list 
+ *     the inet protocol list is searched (i.e. to deliver to 
+ *     TCP for instance)
+ */
+
+static int ipv6_dest_opt(struct sk_buff **skb_ptr, struct device *dev, __u8 *nhptr,
+                        struct ipv6_options *opt);
+
+
+struct hdrtype_proc {
+       u8      type;
+       int     (*func) (struct sk_buff **, struct device *dev, __u8 *ptr,
+                        struct ipv6_options *opt);
+} hdrproc_lst[] = {
+  /*
+       TODO
+
+       {NEXTHDR_HOP,           ipv6_hop_by_hop}
+   */
+       {NEXTHDR_ROUTING,       ipv6_routing_header},
+       {NEXTHDR_FRAGMENT,      ipv6_reassembly},
+  
+       {NEXTHDR_DEST,          ipv6_dest_opt},
+   /*  
+       {NEXTHDR_AUTH,          ipv6_auth_hdr},
+       {NEXTHDR_ESP,           ipv6_esp_hdr},
+    */
+       {NEXTHDR_MAX,           NULL}
+};
+
+/* New header structures */
+
+
+struct ipv6_tlvtype {
+       u8 type;
+       u8 len;
+};
+
+struct ipv6_destopt_hdr {
+       u8 nexthdr;
+       u8 hdrlen;
+};
+
+
+struct tlvtype_proc {
+       u8      type;
+       int     (*func) (struct sk_buff *, struct device *dev, __u8 *ptr,
+                        struct ipv6_options *opt); 
+       
+       /* these functions do NOT  update skb->h.raw */
+                        
+} tlvprocdestopt_lst[] = {
+       {255,                   NULL}
+};
+
+
+static int parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb,
+                    struct device *dev, __u8 *nhptr, struct ipv6_options *opt,
+                    void *lastopt)
+{
+       struct ipv6_tlvtype *hdr;
+       struct tlvtype_proc *curr;
+       int pos;
+
+       while ((hdr=(struct ipv6_tlvtype *)skb->h.raw) != lastopt)
+               switch (hdr->type & 0x3F)
+               {
+               case 0: /* TLV encoded Pad1 */
+                       skb->h.raw++;
+                       break;
+
+               case 1: /* TLV encoded PadN */
+                       skb->h.raw += hdr->len+2;
+                       break;
+
+               default: /* Other TLV code so scan list */
+                       for (curr=procs; curr->type != 255; curr++)
+                               if (curr->type == (hdr->type & 0x3F))
+                               {
+                                       curr->func(skb, dev, nhptr, opt);
+                                       skb->h.raw += hdr->len+2;
+                                       break;
+                               }
+               
+                       if (curr->type==255)
+                       { 
+                               /* unkown type */
+                               pos= (__u8 *) skb->h.raw - (__u8 *) skb->ipv6_hdr;
+                               /* I think this is correct please check - IPM */
+
+                               switch ((hdr->type & 0xC0) >> 6) {
+                                       case 0: /* ignore */
+                                               skb->h.raw += hdr->len+2;
+                                               break;
+                                               
+                                       case 1: /* drop packet */
+                                               kfree_skb(skb, FREE_READ);
+                                               return 0;
+
+                                       case 2: /* send ICMP PARM PROB regardless and 
+                                                  drop packet */
+                                               icmpv6_send(skb, ICMPV6_PARAMETER_PROB, 
+                                                           2, pos, dev);
+                                               kfree_skb(skb, FREE_READ);
+                                               return 0;
+
+                                       case 3: /* Send ICMP if not a multicast address 
+                                                  and drop packet */
+                                               if (!(ipv6_addr_type(&(skb->ipv6_hdr->daddr)) & IPV6_ADDR_MULTICAST) )
+                                                       icmpv6_send(skb, ICMPV6_PARAMETER_PROB, 2, pos, dev);
+                                               kfree_skb(skb, FREE_READ);
+                                               return 0;
+                                       }
+                       }
+                       break;
+               }
+       
+       return 1;
+}
+
+
+static int ipv6_dest_opt(struct sk_buff **skb_ptr, struct device *dev, __u8 *nhptr,
+                        struct ipv6_options *opt)
+{
+       struct sk_buff *skb=*skb_ptr;
+       struct ipv6_destopt_hdr *hdr = (struct ipv6_destopt_hdr *) skb->h.raw;
+       
+       if (parse_tlv(tlvprocdestopt_lst, skb, dev, nhptr, opt,skb->h.raw+hdr->hdrlen))
+               return hdr->nexthdr;
+       else
+               return 0;
+}
+
+
+
+/*
+ *     0 - deliver
+ *     1 - block
+ */
+static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
+{
+       struct icmpv6hdr *icmph;
+       struct raw6_opt *opt;
+
+       opt = &sk->tp_pinfo.tp_raw;
+       icmph = (struct icmpv6hdr *) (skb->ipv6_hdr + 1);
+       return test_bit(icmph->type, &opt->filter);
+}
+
+/*
+ *     demultiplex raw sockets.
+ *     (should consider queueing the skb in the sock receive_queue
+ *     without calling rawv6.c)
+ */
+static struct sock * ipv6_raw_deliver(struct sk_buff *skb,
+                                     struct device *dev,
+                                     struct ipv6_options *opt,
+                                     __u16 nexthdr,
+                                     __u16 len,
+                                     struct in6_addr *saddr,
+                                     struct in6_addr *daddr)
+{
+       struct sock *sk, *sk2;
+       __u8 hash;
+
+       hash = nexthdr & (SOCK_ARRAY_SIZE-1);
+
+       sk = rawv6_prot.sock_array[hash];
+       
+
+       /*
+        *      The first socket found will be delivered after
+        *      delivery to transport protocols.
+        */
+
+       if (sk == NULL)
+               return NULL;
+       
+       sk = inet6_get_sock_raw(sk, nexthdr, daddr, saddr);
+
+       if (sk)
+       {
+               sk2 = sk;
+
+               while ((sk2 = inet6_get_sock_raw(sk2->next, nexthdr, 
+                                                daddr, saddr)))
+               {
+                       struct sk_buff *buff;
+
+                       if (nexthdr == IPPROTO_ICMPV6 &&
+                           icmpv6_filter(sk2, skb))
+                       {
+                               continue;
+                       }
+                       buff = skb_clone(skb, GFP_ATOMIC);
+                       buff->sk = sk2;
+                       rawv6_rcv(buff, dev, saddr, daddr, opt, len);
+               }
+       }
+
+       if (sk && nexthdr == IPPROTO_ICMPV6 && icmpv6_filter(sk, skb))
+       {
+               sk = NULL;
+       }   
+       
+       return sk;
+}
+
+int ipv6_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
+{
+       struct inet6_ifaddr     *ifp;
+       struct ipv6_options     *opt = (struct ipv6_options *) skb->proto_priv;
+       struct ipv6hdr          *hdr;
+       u8                      hash;
+       u8                      addr_type;
+       struct inet6_protocol   *ipprot;
+       struct sock             *raw_sk;
+       int                     found = 0;
+       int                     nexthdr = 0;
+       __u8                    *nhptr;
+       int                     pkt_len;
+
+       hdr = skb->ipv6_hdr = (struct ipv6hdr *) skb->h.raw;
+
+       if (skb->len < sizeof(struct ipv6hdr) || hdr->version != 6)
+       {
+               ipv6_statistics.Ip6InHdrErrors++;
+               printk(KERN_DEBUG "ipv6_rcv: broken header\n");
+               kfree_skb(skb, FREE_READ);
+               return 0;
+       }
+
+       pkt_len = ntohs(hdr->payload_len);
+
+       if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
+       {
+               printk(KERN_DEBUG "ipv6_rcv: invalid payload length\n");
+               kfree_skb(skb, FREE_READ);
+               return 0;
+       }
+
+       skb_trim(skb, pkt_len + sizeof(struct ipv6hdr));
+
+       /* check daddr */
+
+       /* Accounting & Firewall check */
+
+       addr_type = ipv6_addr_type(&hdr->daddr);
+
+       if (addr_type & IPV6_ADDR_MULTICAST)
+       {
+               /* 
+                * if mcast address is not for one of our groups
+                * either pass it to mcast router or discard it 
+                */
+
+               if (ipv6_chk_mcast_addr(dev, &hdr->daddr) == 0)
+               {
+                       /* something like:
+                          if (acting_as_router)
+                               ipv6_mcast_route(skb, ...)
+                          else 
+                          */
+                       kfree_skb(skb, FREE_READ);
+                       return 0;
+               }
+       }
+
+       if (addr_type & IPV6_ADDR_MULTICAST ||
+           (ifp = ipv6_chk_addr(&hdr->daddr)))     
+       {
+
+               /* loop in a cicle parsing nexthdrs */
+
+               skb->h.raw   += sizeof(struct ipv6hdr);
+               /* extension header processing must update skb->h.raw */
+
+               nexthdr = hdr->nexthdr;
+               nhptr = &hdr->nexthdr;
+
+
+               while(1)
+               {
+                       struct hdrtype_proc *hdrt;
+
+                       /* check for extension header */
+
+                       for (hdrt=hdrproc_lst; hdrt->type != NEXTHDR_MAX; hdrt++)
+                       {
+                               if (hdrt->type == nexthdr)
+                               {
+                                       if ((nexthdr = hdrt->func(&skb, dev, nhptr, opt)))
+                                       {
+                                               nhptr = skb->h.raw;
+                                               hdr = skb->ipv6_hdr;
+                                               continue;
+                                       }
+                                       return 0;
+                               }
+                       }
+                       break;
+
+               }
+
+               /* 
+                * deliver to raw sockets
+                * should we deliver raw after or before parsing 
+                * extension headers ?
+                * delivering after means we do reassembly of datagrams
+                * in ip.
+                */
+
+               pkt_len = skb->tail - skb->h.raw;
+
+               raw_sk = ipv6_raw_deliver(skb, dev, opt, nexthdr, pkt_len,
+                                         &hdr->saddr, &hdr->daddr);
+
+               /* check inet6_protocol list */
+
+               hash = nexthdr & (MAX_INET_PROTOS -1);
+               for (ipprot = (struct inet6_protocol *) inet6_protos[hash]; 
+                    ipprot != NULL; 
+                    ipprot = (struct inet6_protocol *) ipprot->next)
+               {
+                       struct sk_buff *buff = skb;
+
+                       if (ipprot->protocol != nexthdr)
+                               continue;
+
+                       if (ipprot->copy || raw_sk)
+                               buff = skb_clone(skb, GFP_ATOMIC);
+
+
+                       ipprot->handler(buff, dev, 
+                                       &hdr->saddr, &hdr->daddr,
+                                       opt, pkt_len,
+                                       0, ipprot);
+                       found = 1;
+               }
+
+               if (raw_sk)
+               {
+                       skb->sk = raw_sk;
+                       rawv6_rcv(skb, dev, &hdr->saddr, &hdr->daddr, opt,
+                                 htons(hdr->payload_len));
+                       found = 1;
+               }
+              
+               /* not found: send ICMP parameter problem back */
+
+               if (!found)
+               {
+                       printk(KERN_DEBUG "proto not found %d\n", nexthdr);
+                       skb->sk = NULL;
+                       kfree_skb(skb, FREE_READ);
+               }
+                       
+       }
+       else
+       {
+               if (ipv6_forwarding)
+               {
+                       if (addr_type & IPV6_ADDR_LINKLOCAL)
+                       {
+                               printk(KERN_DEBUG
+                                      "link local pkt to forward\n");
+                               kfree_skb(skb, FREE_READ);
+                               return 0;
+                       }
+                       ipv6_forward(skb, dev, 0);
+               }
+               else
+               {
+                       printk(KERN_WARNING "IPV6: packet to forward -"
+                              "host not configured as router\n");
+                       kfree_skb(skb, FREE_READ);
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Local variables:
+ * c-file-style: "Linux"
+ * End:
+ */
diff --git a/net/ipv6/ipv6_output.c b/net/ipv6/ipv6_output.c
new file mode 100644 (file)
index 0000000..eb089b4
--- /dev/null
@@ -0,0 +1,964 @@
+/*
+ *     IPv6 output functions
+ *     Linux INET6 implementation 
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *     Based on linux/net/ipv4/ip_output.c
+ *
+ *     $Id: ipv6_output.c,v 1.19 1996/10/16 18:34:16 roque Exp $
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/in6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/ipv6_route.h>
+#include <net/addrconf.h>
+
+static u32     ipv6_fragmentation_id = 1;
+int            ipv6_forwarding = 0;            /* default: host */
+
+static int __inline__ ipv6_build_mac_header(struct sk_buff *skb,
+                                           struct device *dev,
+                                           struct neighbour *neigh, 
+                                           int len)
+{
+       int mac;
+       int hdrlen = 0;
+
+       skb->arp = 1;
+       skb->nexthop = neigh;
+
+
+       if (dev->hard_header_len)
+       {
+               skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
+
+               if (neigh && (neigh->flags & NCF_HHVALID))
+               {
+                       /*
+                        *      Cached hardware header
+                        */
+
+                       memcpy(skb_push(skb, dev->hard_header_len),
+                              neigh->hh_data, dev->hard_header_len);
+
+                       return dev->hard_header_len;
+               }
+
+               if (dev->hard_header)
+               {
+                       mac = dev->hard_header(skb, dev, ETH_P_IPV6, 
+                                              NULL, NULL, len);
+               
+                       if (mac < 0)
+                       {                               
+                               hdrlen = -mac;
+                               skb->arp = 0;
+                       }
+                       else
+                       {                               
+                               hdrlen = mac;
+                       }
+               }
+               else
+                       hdrlen = dev->hard_header_len;
+       }
+
+       return hdrlen;
+}
+
+void ipv6_redo_mac_hdr(struct sk_buff *skb, struct neighbour *neigh, int len)
+{
+       struct device *dev = neigh->dev;
+       int mac;
+       
+       skb->dev = dev;
+       skb->nexthop = neigh;
+       skb->arp = 1;
+
+       skb_pull(skb, (unsigned char *) skb->ipv6_hdr - skb->data);
+
+       /*
+        *      neighbour cache should have the ether address
+        *      cached... use it
+        */ 
+
+       if (dev->hard_header)
+       {
+               if (neigh && (neigh->flags & NCF_HHVALID))
+               {
+                       /*
+                        *      Cached hardware header
+                        */
+
+                       memcpy(skb_push(skb, dev->hard_header_len),
+                              neigh->hh_data, dev->hard_header_len);
+                       return;
+               }
+
+               mac = dev->hard_header(skb, dev, ETH_P_IPV6, 
+                                      NULL, NULL, len);
+               
+               if (mac < 0)
+               {                               
+                       skb->arp = 0;
+               }
+
+       }
+}
+
+void default_output_method(struct sk_buff *skb, struct rt6_info *rt)
+{
+       struct sock *sk = skb->sk;
+       struct device *dev = skb->dev;
+
+       if (dev->flags & IFF_UP)
+       {
+               /*
+                *      If we have an owner use its priority setting,
+                *      otherwise use NORMAL
+                */
+
+               if (sk != NULL)
+               {
+                       dev_queue_xmit(skb, dev, sk->priority);
+               }
+               else
+               {
+                       dev_queue_xmit(skb, dev, SOPRI_NORMAL);
+               }
+       }
+       else
+       {
+               if(sk)
+                       sk->err = ENETDOWN;
+
+               ipv6_statistics.Ip6OutDiscards++;
+               
+               kfree_skb(skb, FREE_WRITE);
+       }
+}
+
+/*
+ *     xmit an sk_buff (used by TCP)
+ *     sk can be NULL (for sending RESETs)
+ */
+int ipv6_xmit(struct sock *sk, struct sk_buff *skb, struct in6_addr *saddr,
+             struct in6_addr *daddr, struct ipv6_options *opt, int proto)
+{
+       struct ipv6hdr *hdr;
+       struct dest_entry *dc;
+       struct ipv6_pinfo *np = NULL;
+       struct device *dev = skb->dev;
+       int seg_len;
+       int addr_type;
+       int rt_flags = 0;
+
+
+       addr_type = ipv6_addr_type(daddr);
+
+       if (addr_type & (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_SITELOCAL))
+       {
+               /*
+                *      force device match on route lookup
+                */
+               
+               rt_flags |= RTI_DEVRT;
+       }
+
+       if (skb->localroute)
+       {
+               rt_flags |= RTI_GATEWAY;
+       }
+
+       hdr = skb->ipv6_hdr;
+       
+
+       if (sk)
+       {
+               np = &sk->net_pinfo.af_inet6;
+       }
+       
+       if (np && np->dest)
+       {
+               dc = ipv6_dst_check(np->dest, daddr, np->dc_sernum, rt_flags);
+       }
+       else
+       {
+               dc = ipv6_dst_route(daddr, dev, rt_flags);
+       }
+
+       if (dc == NULL)
+       {
+               ipv6_statistics.Ip6OutNoRoutes++;
+               return(-ENETUNREACH);
+       }
+
+       dev = dc->rt.rt_dev;
+       
+       if (saddr == NULL)
+       {
+               struct inet6_ifaddr *ifa;
+               
+               ifa = ipv6_get_saddr((struct rt6_info *) dc, daddr);
+               
+               if (ifa == NULL)
+               {
+                       printk(KERN_DEBUG 
+                              "ipv6_xmit: get_saddr failed\n");
+                       return -ENETUNREACH;
+               }
+               
+               saddr = &ifa->addr;
+               
+               if (np)
+               {
+                       ipv6_addr_copy(&np->saddr, saddr);
+               }
+       }
+
+       seg_len = skb->tail - ((unsigned char *) hdr);
+
+       /*
+        *      Link Layer headers
+        */
+
+       skb->sk = sk;
+       skb->protocol = __constant_htons(ETH_P_IPV6);
+       skb->free = 1;
+       skb->dev = dev;
+       
+       ipv6_redo_mac_hdr(skb, dc->dc_nexthop, seg_len);
+       
+       /*
+        *      Fill in the IPv6 header
+        */
+
+       hdr->version = 6;
+       hdr->priority = np ? np->priority : 0;
+
+       if (np)
+               memcpy(hdr->flow_lbl, (void *) &np->flow_lbl, 3);
+       else
+               memset(hdr->flow_lbl, 0, 3);
+
+       hdr->payload_len = htons(seg_len - sizeof(struct ipv6hdr));
+       hdr->nexthdr = proto;
+       hdr->hop_limit = np ? np->hop_limit : ipv6_hop_limit;
+       
+       memcpy(&hdr->saddr, saddr, sizeof(struct in6_addr));
+       memcpy(&hdr->daddr, daddr, sizeof(struct in6_addr));
+
+       
+       /*
+        *      Options
+        */
+
+
+       /*
+        *      Output the packet
+        */
+
+       ipv6_statistics.Ip6OutRequests++;
+
+       if (dc->rt.rt_output_method)
+       {
+               (*dc->rt.rt_output_method)(skb, (struct rt6_info *) dc);
+       }
+       else
+               default_output_method(skb, (struct rt6_info *) dc);
+
+       /*
+        *      Update serial number of cached dest_entry or
+        *      release destination cache entry
+        */
+       
+       if (np)
+       {
+               np->dest = dc;
+               if (dc->rt.fib_node)
+               {
+                       np->dc_sernum = dc->rt.fib_node->fn_sernum;
+               }
+       }
+       else
+       {
+               ipv6_dst_unlock(dc);
+       }
+
+       return 0;
+}
+
+/*
+ *     To avoid extra problems ND packets are send through this
+ *     routine. It's code duplication but i really want to avoid
+ *     extra checks since ipv6_build_header is used by TCP (which
+ *     is for us performace critical)
+ */
+
+int ipv6_bld_hdr_2(struct sock *sk, struct sk_buff *skb, struct device *dev,
+                  struct neighbour *neigh,
+                  struct in6_addr *saddr, struct in6_addr *daddr,
+                  int proto, int len)
+{
+       struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+       struct ipv6hdr *hdr;
+       int hdrlen = 0;
+
+       skb->dev = dev;
+
+       /* build MAC header */
+       hdrlen += ipv6_build_mac_header(skb, dev, neigh, len);
+
+       /* build fixed IPv6 header */
+
+       if (proto == IPPROTO_RAW)
+               return hdrlen;
+
+       
+       hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
+       skb->ipv6_hdr = hdr;
+       
+       hdr->version  = 6;
+       hdr->priority = np->priority & 0x0f;
+
+       memset(hdr->flow_lbl, 0, 3);
+
+       hdr->hop_limit =  np->hop_limit;
+
+       if (saddr == NULL)
+       {
+               printk(KERN_DEBUG "bug: bld_hdr called with no saddr\n");
+               return -ENETUNREACH;
+       }
+
+       memcpy(&hdr->saddr, saddr, sizeof(struct in6_addr));
+       memcpy(&hdr->daddr, daddr, sizeof(struct in6_addr));
+       
+       hdrlen += sizeof(struct ipv6hdr);
+
+       hdr->nexthdr = proto;
+
+       return hdrlen;
+}
+
+void ipv6_queue_xmit(struct sock *sk, struct device *dev, struct sk_buff *skb,
+                    int free)
+{
+       struct ipv6hdr *hdr;
+       u32 seg_len;
+
+       hdr = skb->ipv6_hdr;
+       skb->sk = sk;
+       skb->protocol = __constant_htons(ETH_P_IPV6);
+       skb->free=1;
+
+       seg_len = skb->tail - ((unsigned char *) hdr);
+
+       hdr->payload_len = htons(seg_len - sizeof(struct ipv6hdr));
+
+       if (dev == NULL)
+       {
+               printk(KERN_DEBUG "ipv6_queue_xmit: unknown device\n");
+               return;
+       }
+
+       skb->dev = dev;
+       
+       ipv6_statistics.Ip6OutRequests++;
+
+
+       /*
+        *      Multicast loopback
+        */
+
+       if (dev->flags & IFF_UP)
+       {
+               /*
+                *      If we have an owner use its priority setting,
+                *      otherwise use NORMAL
+                */
+
+               if (sk != NULL)
+               {
+                       dev_queue_xmit(skb, dev, sk->priority);
+               }
+               else
+               {
+                       dev_queue_xmit(skb, dev, SOPRI_NORMAL);
+               }
+       }
+       else
+       {
+               if(sk)
+                       sk->err = ENETDOWN;
+
+               ipv6_statistics.Ip6OutDiscards++;
+
+               kfree_skb(skb, FREE_WRITE);
+       }
+
+}
+
+
+int ipv6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
+                   struct in6_addr *dest, unsigned short int length,
+                   struct in6_addr *saddr, struct device *dev,
+                   struct ipv6_options *opt, int proto,                    
+                   int noblock)
+{
+       rt6_output_method_t output_method = default_output_method;
+       struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+       struct dest_entry *dc = NULL;
+       struct in6_addr *daddr = dest;
+       struct ipv6hdr *hdr;
+       struct neighbour *neigh;        
+       int     addr_type;
+       int     pktlength;
+       int     pmtu = 0;
+       int     rt_flags = 0;
+       
+       
+       if (opt && opt->srcrt)
+       {
+               struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+               daddr = rt0->addr;
+       }
+
+       addr_type = ipv6_addr_type(daddr);
+       if (addr_type & IPV6_ADDR_MULTICAST && dev == NULL)
+       {
+               dev = np->mc_if;
+       }
+
+       if (addr_type & (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_SITELOCAL |
+                        IPV6_ADDR_MULTICAST))
+       {
+               /*
+                *      force device match on route lookup
+                */
+               
+               rt_flags |= RTI_DEVRT;
+       }
+
+       if (sk->localroute)
+       {
+               rt_flags |= RTI_GATEWAY;
+       }
+
+       if (np->dest)
+       {
+               np->dest = ipv6_dst_check(np->dest, daddr, np->dc_sernum,
+                                         rt_flags);
+               
+               dc = np->dest;
+
+               if (dc && dc->rt.fib_node)
+               {
+                       np->dc_sernum = dc->rt.fib_node->fn_sernum;
+               }
+               else
+               {
+                       printk(KERN_WARNING "dc entry not in table\n");
+               }
+       }
+       else
+       {
+               dc = ipv6_dst_route(daddr, dev, rt_flags);
+       }
+
+       if (dc == NULL)
+       {
+               if ((addr_type & IPV6_ADDR_MULTICAST) && dev)
+               {
+                       neigh = NULL;
+                       pmtu = dev->mtu;
+               }
+               else
+               {
+                       ipv6_statistics.Ip6OutNoRoutes++;
+                       return(-ENETUNREACH);
+               }
+       }
+       else
+       {
+               neigh = dc->dc_nexthop;
+               dev = neigh->dev;
+
+               if (dc->rt.rt_output_method)
+               {
+                       output_method = dc->rt.rt_output_method;
+               }
+
+               if (dc->dc_flags & DCF_PMTU)
+                       pmtu = dc->dc_pmtu;
+               else
+                       pmtu = dev->mtu;
+       }
+
+
+       if (saddr == NULL)
+       {
+               struct inet6_ifaddr *ifa;
+
+               ifa = ipv6_get_saddr((struct rt6_info *) dc, daddr);
+
+               if (ifa == NULL)
+               {
+                       printk(KERN_DEBUG 
+                              "ipv6_build_xmit: get_saddr failed\n");
+                       return -ENETUNREACH;
+               }
+
+               saddr = &ifa->addr;
+       }
+
+       if (dc && np->dest == NULL)
+       {
+               ipv6_dst_unlock(dc);
+       }
+
+       pktlength = length;
+
+       if (!sk->ip_hdrincl)
+       { 
+               pktlength += sizeof(struct ipv6hdr);
+               if (opt)
+               {
+                       pktlength += opt->opt_flen + opt->opt_nflen;
+               }
+       }
+               
+
+       dev_lock_list();
+
+       /*
+        *      reminder: don't allow fragmentation for IPPROTO_RAW
+        */
+
+
+       if (pktlength <= pmtu) 
+       {
+               int error;
+
+               struct sk_buff *skb =
+                       sock_alloc_send_skb(sk, pktlength+15+
+                                           dev->hard_header_len,
+                                           0, noblock, &error);
+               
+               if (skb == NULL)
+               {
+                       ipv6_statistics.Ip6OutDiscards++;
+                       dev_unlock_list();
+                       return error;
+
+               }
+
+               skb->dev=dev;
+               skb->protocol = htons(ETH_P_IPV6);
+               skb->free=1;
+               skb->when=jiffies;
+               skb->sk=sk;
+               skb->arp=0;
+
+               /* build the mac header... */
+               ipv6_build_mac_header(skb, dev, neigh, pktlength);
+
+               hdr = (struct ipv6hdr *) skb->tail;
+
+               if (!sk->ip_hdrincl)
+               {
+                       skb_put(skb, sizeof(struct ipv6hdr));
+                       skb->ipv6_hdr = hdr;
+
+                       hdr->version = 6;
+                       hdr->priority = np->priority;
+
+                       memcpy(hdr->flow_lbl, &np->flow_lbl, 3);
+
+                       hdr->payload_len = htons(pktlength - 
+                                                sizeof(struct ipv6hdr));
+                       
+                       hdr->hop_limit = np->hop_limit;
+
+                       memcpy(&hdr->saddr, saddr, sizeof(struct in6_addr));
+                       memcpy(&hdr->daddr, daddr, sizeof(struct in6_addr));
+
+                       if (opt && opt->srcrt)
+                       {
+                               hdr->nexthdr = ipv6opt_bld_rthdr(skb, opt,
+                                                                dest, proto);
+                                                                
+                       }
+                       else
+                               hdr->nexthdr = proto;
+               }
+                       
+               skb_put(skb, length);
+               getfrag(data, &hdr->saddr,
+                       ((char *) hdr) + (pktlength - length), 0, length);
+                       
+               ipv6_statistics.Ip6OutRequests++;
+               (*output_method)(skb, (struct rt6_info *) dc);
+
+               dev_unlock_list();
+               return 0;
+       }
+       else
+       {
+               /*
+                *      Fragmentation
+                */
+
+               /*
+                *      Extension header order:
+                *      Hop-by-hop -> Routing -> Fragment -> rest (...)
+                *      
+                *      We must build the non-fragmented part that
+                *      will be in every packet... this also means
+                *      that other extension headers (Dest, Auth, etc)
+                *      must be considered in the data to be fragmented
+                */
+
+               struct sk_buff  *last_skb;
+               struct frag_hdr *fhdr;
+               int unfrag_len;
+               int payl_len;
+               int frag_len;
+               int last_len;
+               int nfrags;
+               int err;
+               int fhdr_dist;
+               __u32 id;
+
+               if (sk->ip_hdrincl)
+               {
+                       return -EMSGSIZE;
+               }
+
+               id = ipv6_fragmentation_id++;
+
+               unfrag_len = sizeof(struct ipv6hdr) + sizeof(struct frag_hdr);
+               payl_len = length;
+
+               if (opt)
+               {
+                       unfrag_len += opt->opt_nflen;
+                       payl_len += opt->opt_flen;
+               }
+
+               nfrags = payl_len / ((pmtu - unfrag_len) & ~0x7);
+
+               /* 
+                * Length of fragmented part on every packet but 
+                * the last must be an:
+                * "integer multiple of 8 octects".
+                */
+
+               frag_len = (pmtu - unfrag_len) & ~0x7;
+
+               /*
+                *      We must send from end to start because of 
+                *      UDP/ICMP checksums. We do a funny trick:
+                *      fill the last skb first with the fixed
+                *      header (and its data) and then use it
+                *      to create the following segments and send it
+                *      in the end. If the peer is checking the M_flag
+                *      to trigger the reassembly code then this 
+                *      might be a good idea.
+                */
+
+               last_len = payl_len - (nfrags * frag_len);
+
+               if (last_len == 0)
+               {
+                       last_len = frag_len;
+                       nfrags--;
+               }
+               
+               last_skb = sock_alloc_send_skb(sk, unfrag_len + frag_len +
+                                              dev->hard_header_len + 15,
+                                              0, noblock, &err);
+
+               if (last_skb == NULL)
+               {
+                       dev_unlock_list();
+                       return err;
+               }
+
+               last_skb->dev=dev;
+               last_skb->protocol = htons(ETH_P_IPV6);
+               last_skb->free=1;
+               last_skb->when=jiffies;
+               last_skb->sk=sk;
+               last_skb->arp=0;
+
+               /* 
+                * build the mac header... 
+                */
+               ipv6_build_mac_header(last_skb, dev, neigh,
+                                     unfrag_len + frag_len);
+
+               hdr = (struct ipv6hdr *) skb_put(last_skb, 
+                                                sizeof(struct ipv6hdr));
+               last_skb->ipv6_hdr = hdr;
+
+               hdr->version = 6;
+               hdr->priority = np->priority;
+
+               memcpy(hdr->flow_lbl, &np->flow_lbl, 3);
+               hdr->payload_len = htons(unfrag_len + frag_len - 
+                                        sizeof(struct ipv6hdr));
+               
+               hdr->hop_limit = np->hop_limit;
+
+               hdr->nexthdr = NEXTHDR_FRAGMENT;
+
+               memcpy(&hdr->saddr, saddr, sizeof(struct in6_addr));
+               memcpy(&hdr->daddr, daddr, sizeof(struct in6_addr));
+               
+               if (opt && opt->srcrt)
+               {
+                       hdr->nexthdr = ipv6opt_bld_rthdr(last_skb, opt, dest,
+                                                        NEXTHDR_FRAGMENT);
+               }
+       
+               fhdr = (struct frag_hdr *)
+                       skb_put(last_skb, sizeof(struct frag_hdr));
+
+               memset(fhdr, 0, sizeof(struct frag_hdr));
+
+               fhdr->nexthdr  = proto;         
+               fhdr->frag_off = ntohs(nfrags * frag_len);
+               fhdr->identification = id;
+
+               fhdr_dist = (unsigned char *) fhdr - last_skb->data;
+
+               getfrag(data, &hdr->saddr, last_skb->tail, nfrags * frag_len, 
+                       last_len);
+               
+               while (nfrags--)
+               {
+                       struct sk_buff *skb;
+
+                       struct frag_hdr *fhdr2;
+
+                       printk(KERN_DEBUG "sending frag %d\n", nfrags);
+                       skb = skb_copy(last_skb, sk->allocation);
+
+                       fhdr2 = (struct frag_hdr *) (skb->data + fhdr_dist);
+                       /* more flag on */
+                       fhdr2->frag_off = ntohs(nfrags * frag_len + 1);
+
+                       /* if (nfrags == 0)
+                          put rest of headers
+                          */
+
+                       getfrag(data, &hdr->saddr, skb_put(skb, frag_len), 
+                               nfrags * frag_len, frag_len);
+
+                       ipv6_statistics.Ip6OutRequests++;
+                       (*output_method)(skb, (struct rt6_info *) dc);
+               }
+
+               printk(KERN_DEBUG "sending last frag \n");
+
+               hdr->payload_len = htons(unfrag_len + last_len - 
+                                        sizeof(struct ipv6hdr));
+
+               /*
+                *      update last_skb to reflect the getfrag we did
+                *      on start.
+                */
+               last_skb->tail += last_len;
+               last_skb->len += last_len;
+
+               /* 
+                * toss the mac header out and rebuild it.
+                * needed because of the different frame length.
+                * ie: not needed for an ethernet.
+                */
+
+               if (dev->type != ARPHRD_ETHER && last_len != frag_len)
+               {
+                       ipv6_redo_mac_hdr(last_skb, neigh,
+                                         unfrag_len + last_len);
+               }
+
+               ipv6_statistics.Ip6OutRequests++;
+               (*output_method)(last_skb, (struct rt6_info *) dc);
+
+               dev_unlock_list();
+               return 0;
+       }
+       return -1;
+}
+
+static int pri_values[4] =
+{
+       SOPRI_BACKGROUND,
+       SOPRI_NORMAL,
+       SOPRI_NORMAL,
+       SOPRI_INTERACTIVE
+};
+
+void ipv6_forward(struct sk_buff *skb, struct device *dev, int flags)
+{
+       struct neighbour *neigh;
+       struct dest_entry *dest;
+       int priority;
+       int rt_flags;
+       int size;
+       int pmtu;
+
+       if (skb->ipv6_hdr->hop_limit <= 1)
+       {
+               icmpv6_send(skb, ICMPV6_TIME_EXCEEDED, ICMPV6_EXC_HOPLIMIT,
+                           0, dev);
+
+               kfree_skb(skb, FREE_READ);
+               return;
+       }
+
+       skb->ipv6_hdr->hop_limit--;
+
+       if (ipv6_addr_type(&skb->ipv6_hdr->saddr) & IPV6_ADDR_LINKLOCAL)
+       {
+               printk(KERN_DEBUG "ipv6_forward: link local source addr\n");
+               icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOT_NEIGHBOUR,
+                           0, dev);            
+               kfree_skb(skb, FREE_READ);
+               return;
+       }
+
+       rt_flags = RTF_MODIFIED;
+
+       if ((flags & IP6_FW_STRICT))
+       {
+               rt_flags |= RTF_GATEWAY;
+       }
+
+       dest = ipv6_dst_route(&skb->ipv6_hdr->daddr, NULL, rt_flags);
+
+       if (dest == NULL)
+       {
+               int code;
+
+               if (flags & IP6_FW_STRICT)
+                       code = ICMPV6_NOT_NEIGHBOUR;
+               else
+                       code = ICMPV6_NOROUTE;
+                       
+               icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, dev);
+                           
+               kfree_skb(skb, FREE_READ);
+               return;
+       }
+
+       neigh = dest->dc_nexthop;
+
+       if (neigh->dev == dev && (dev->flags & IFF_MULTICAST) &&
+           !(flags & IP6_FW_SRCRT))
+       {
+               struct in6_addr *target = NULL;
+
+               /* 
+                *      outgoing device equal to incoming device
+                *      send a redirect
+                */
+               
+               if ((dest->dc_flags & RTF_GATEWAY))
+               {
+                       target = &neigh->addr;
+               }
+               else
+               {
+                       target = &skb->ipv6_hdr->daddr;
+               }
+
+               ndisc_send_redirect(skb, neigh, target);
+       }
+
+       pmtu = neigh->dev->mtu;
+
+       size = sizeof(struct ipv6hdr) + ntohs(skb->ipv6_hdr->payload_len);
+       
+       if (size > pmtu)
+       {
+               icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, pmtu, dev);
+               kfree_skb(skb, FREE_READ);
+               return;
+       }
+
+       ipv6_dst_unlock(dest);
+
+       if (skb_headroom(skb) < neigh->dev->hard_header_len)
+       {
+               struct sk_buff *buff;
+
+               buff = alloc_skb(neigh->dev->hard_header_len + skb->len + 15,
+                                GFP_ATOMIC);
+
+               if (buff == NULL)
+               {
+                       return;
+               }
+               
+               skb_reserve(buff, (neigh->dev->hard_header_len + 15) & ~15);
+
+               buff->protocol = __constant_htons(ETH_P_IPV6);
+               buff->free = 1;
+               buff->h.raw = skb_put(buff, size);
+
+               memcpy(buff->h.raw, skb->ipv6_hdr, size);
+               buff->ipv6_hdr = (struct ipv6hdr *) buff->h.raw;
+               kfree_skb(skb, FREE_READ);
+               skb = buff;
+       }
+
+       ipv6_redo_mac_hdr(skb, neigh, size);
+
+       priority = skb->ipv6_hdr->priority;
+
+       priority = (priority & 0x7) >> 1;
+       priority = pri_values[priority];
+
+       if (dev->flags & IFF_UP)
+       {
+               dev_queue_xmit(skb, neigh->dev, priority);
+       }
+       else
+       {
+               ipv6_statistics.Ip6OutDiscards++;
+               kfree_skb(skb, FREE_READ);
+       }
+}
+
+
+/*
+ * Local variables:
+ * c-file-style: "Linux"
+ * End:
+ */
diff --git a/net/ipv6/ipv6_route.c b/net/ipv6/ipv6_route.c
new file mode 100644 (file)
index 0000000..39aaf99
--- /dev/null
@@ -0,0 +1,1905 @@
+/*
+ *     IPv6 routing table
+ *     Linux INET6 implementation 
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/route.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+
+#ifdef         CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#endif
+
+#include <net/tcp.h>
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/ipv6_route.h>
+#include <net/addrconf.h>
+
+#include <net/netlink.h>
+
+#include <asm/uaccess.h>
+
+/*
+ *     Routing Table
+ *
+ *     simplified version of a radix tree
+ *
+ *     - every node shares it's acestors prefix
+ *     - the tree is ordered from less to most specific mask
+ *     - default routes are handled apart
+ *
+ *     this facilitates recursion a lot
+ */
+
+static struct rt6_info null_entry = {
+       NULL, NULL, 
+       {{{0}}},
+       0, 1,
+       NULL, NULL,
+       0, 0, RTF_REJECT
+};
+
+struct fib6_node routing_table = {
+       NULL, NULL, NULL, &null_entry, 
+       0, RTN_ROOT, 0
+};
+
+struct rt6_info                *default_rt_list = NULL;
+struct rt6_info                *loopback_rt = NULL;
+
+/*
+ *     last_resort_rt - no routers present.
+ *     Assume all destinations on link.
+ */
+struct rt6_info                *last_resort_rt = NULL;
+
+static struct rt6_req request_queue = {
+       0, NULL, &request_queue, &request_queue
+};
+
+
+/*
+ *     A routing update causes an increase of the serial number on the
+ *     afected subtree. This allows for cached routes to be asynchronously
+ *     tested when modifications are made to the destination cache as a
+ *     result of redirects, path MTU changes, etc.
+ */
+
+static __u32   rt_sernum       = 0;
+
+static atomic_t rt6_lock       = 0;
+static int     rt6_bh_mask     = 0;
+
+#define RT_BH_REQUEST  1
+#define RT_BH_GC       2
+
+static void __rt6_run_bh(void);
+
+typedef void (*f_pnode)(struct fib6_node *fn, void *);
+
+static void    rt6_walk_tree(f_pnode func, void * arg, int filter);
+static void    rt6_rt_timeout(struct fib6_node *fn, void *arg);
+static int     rt6_msgrcv(struct sk_buff *skb);
+
+struct rt6_statistics rt6_stats = {
+       1, 0, 1, 1, 0
+};
+
+static atomic_t        rt_clients = 0;
+
+void rt6_timer_handler(unsigned long data);
+
+struct timer_list rt6_gc_timer = {
+       NULL,
+       NULL,
+       0,
+       0,
+       rt6_timer_handler
+};
+
+static __inline__ void rt6_run_bh(void)
+{
+       unsigned long flags;
+
+       save_flags(flags);
+       cli();
+
+       if (rt6_lock == 0 && rt6_bh_mask)
+       {
+               __rt6_run_bh();
+       }
+       restore_flags(flags);
+}
+
+/*
+ *     request queue operations
+ *     FIFO queue/dequeue
+ */
+static __inline__ void rtreq_queue(struct rt6_req * req)
+{
+       unsigned long flags;
+       struct rt6_req *next = &request_queue;
+
+       save_flags(flags);
+       cli();
+
+       req->prev = next->prev;
+       req->prev->next = req;
+       next->prev = req;
+       req->next = next;
+       restore_flags(flags);
+}
+
+static __inline__ struct rt6_req * rtreq_dequeue(void)
+{
+       struct rt6_req *next = &request_queue;
+       struct rt6_req *head;
+
+       head = next->next;
+
+       if (head == next)
+       {
+               return NULL;
+       }
+
+       head->next->prev = head->prev;
+       next->next = head->next;
+
+       head->next = NULL;
+       head->prev = NULL;
+
+       return head;
+}
+
+/*
+ *     compare "prefix length" bits of an address
+ */
+static __inline__ int addr_match(struct in6_addr *a1, struct in6_addr *a2,
+                                int prefixlen)
+{
+       int pdw;
+       int pbi;
+
+       pdw = prefixlen >> 0x05;  /* num of whole __u32 in prefix */
+       pbi = prefixlen &  0x1f;  /* num of bits in incomplete u32 in prefix */
+
+       if (pdw) 
+       {
+               if (memcmp(a1, a2, pdw << 2))
+                       return 0;
+       }
+
+       if (pbi) 
+       {
+               __u32 w1, w2;
+               __u32 mask;
+
+               w1 = a1->s6_addr32[pdw];
+               w2 = a2->s6_addr32[pdw];
+
+               mask = htonl((0xffffffff) << (0x20 - pbi));
+
+               if ((w1 ^ w2) & mask)
+                       return 0;
+       }
+
+       return 1;       
+}
+
+/*
+ *     test bit. range [0-127]
+ */
+
+static __inline__ int addr_bit_set(struct in6_addr *addr, int fn_bit)
+{
+       int dw;
+       __u32 b1;
+       __u32 mask;
+       int bit = fn_bit;
+
+       dw = bit >> 0x05;
+
+       b1 = addr->s6_addr32[dw];
+       
+       bit = ~bit;
+       bit &= 0x1f;
+       mask = htonl(1 << bit);
+       return (b1 & mask);
+}
+
+static __inline__ int addr_bit_equal(struct in6_addr *a1, struct in6_addr *a2,
+                                    int fn_bit)
+{
+       int dw;
+       __u32 b1, b2;
+       __u32 mask;
+       int bit = fn_bit;
+
+       dw = bit >> 0x05;
+
+       b1 = a1->s6_addr32[dw];
+       b2 = a2->s6_addr32[dw];
+       
+       bit = ~bit;
+       bit &= 0x1f;
+       mask = htonl(1 << bit);
+       return !((b1 ^ b2) & mask);
+}
+
+/*
+ *     find the first different bit between two addresses
+ */
+static __inline__ int addr_diff(struct in6_addr *a1, struct in6_addr *a2)
+{
+       int i;
+
+       for (i = 0; i<4; i++)
+       {
+               __u32 b1, b2;
+               __u32 xb;
+               
+               b1 = a1->s6_addr32[i];
+               b2 = a2->s6_addr32[i];
+               
+               xb = b1 ^ b2;
+
+               if (xb)
+               {
+                       int res = 0;
+                       int j=31;
+
+                       xb = ntohl(xb);
+
+                       while (test_bit(j, &xb) == 0)
+                       {
+                               res++;
+                               j--;
+                       }
+                       
+                       return (i * 32 + res);
+               }
+       }
+
+       /*
+        *      bit values are in range [0-127]
+        *      128 is an ilegal value as we should *never* get to
+        *      this point since that would mean the addrs are equal
+        */
+       return 128;
+}
+
+/*
+ *     add a rt to a node that may already contain routes
+ *     sort routes in ascending metric order so that fib lookup
+ *     returns the smallest metric by default
+ */
+
+static __inline__ void fib6_add_rt2node(struct fib6_node *fn,
+                                       struct rt6_info *rt)
+{
+       struct rt6_info *iter, **back;
+
+       rt->fib_node = fn;
+       back = &fn->leaf;
+       
+       for (iter = fn->leaf; iter; iter=iter->next)
+       {
+               if (iter->rt_metric > rt->rt_metric)
+               {
+                       break;
+               }
+
+               back = &iter->next;
+       }
+
+       rt->next = iter;
+       *back = rt;
+}
+
+/*
+ *     Routing Table
+ */
+
+static int fib6_add_1(struct rt6_info *rt)
+{
+       struct fib6_node *fn;
+       struct fib6_node *pn = NULL;
+       struct fib6_node *in;
+       struct fib6_node *ln;
+       struct in6_addr *addr;
+       __u32   bit;
+       __u32   dir = 0;
+       __u32   sernum = ++rt_sernum;
+       int pbit = rt->rt_prefixlen - 1;
+
+       addr = &rt->rt_dst;
+
+       /* insert node in tree */
+
+       fn = &routing_table;
+
+       for (;;)
+       {
+               if (fn == NULL)
+               {                       
+                       ln = kmalloc(sizeof(struct fib6_node), GFP_ATOMIC);
+
+                       if (ln == NULL)
+                               return (-ENOMEM);
+
+                       memset(ln, 0, sizeof(struct fib6_node));
+                       ln->fn_bit   = pbit;
+                       ln->fn_flags = RTN_BACKTRACK;
+                       
+                       ln->parent = pn;
+                       ln->leaf = rt;
+                       ln->fn_sernum = sernum;
+                       rt->fib_node = ln;
+
+                       atomic_inc(&rt->rt_ref);
+
+                       if (dir)
+                               pn->right = ln;
+                       else
+                               pn->left  = ln;
+
+                       rt6_stats.fib_nodes++;
+                       rt6_stats.fib_route_nodes++;
+                       rt6_stats.fib_rt_entries++;
+
+                       return(0);
+               }
+
+               if (addr_match(&fn->leaf->rt_dst, addr, fn->fn_bit))
+               {
+                       if (pbit == fn->fn_bit &&
+                           addr_bit_equal(addr, &fn->leaf->rt_dst,
+                                          rt->rt_prefixlen))
+                       {
+                               /* clean up an intermediate node */
+                               if ((fn->fn_flags & RTN_BACKTRACK) == 0) 
+                               {
+                                       rt_release(fn->leaf);
+                                       fn->leaf = NULL;
+                                       fn->fn_flags |= RTN_BACKTRACK;
+                               }
+                       
+                               fib6_add_rt2node(fn, rt);
+                               fn->fn_sernum = sernum;
+                               atomic_inc(&rt->rt_ref);
+                               
+                               rt6_stats.fib_route_nodes++;
+                               rt6_stats.fib_rt_entries++;
+                               
+                               return 0;
+                       }
+
+                       if (pbit > fn->fn_bit)
+                       {
+                               /* walk down on tree */
+
+                               fn->fn_sernum = sernum;
+
+                               dir = addr_bit_set(addr, fn->fn_bit); 
+                               pn = fn;
+                               fn = dir ? fn->right: fn->left;
+
+                               continue;
+                       }
+               }
+
+               /*
+                * split since we don't have a common prefix anymore or 
+                * we have a less significant route.
+                * we've to insert an intermediate node on the list
+                * this new node will point to the one we need to create
+                * and the current
+                */
+
+               pn = fn->parent;
+
+               /* find 1st bit in difference between the 2 addrs */
+               bit = addr_diff(addr, &fn->leaf->rt_dst);
+
+
+               /* 
+                *              (intermediate)  
+                *                /        \
+                *      (new leaf node)    (old node)
+                */
+               if (rt->rt_prefixlen > bit)
+               {
+                       in = kmalloc(sizeof(struct fib6_node), GFP_ATOMIC);
+               
+                       if (in == NULL)
+                               return (-ENOMEM);
+
+                       memset(in, 0, sizeof(struct fib6_node));
+
+                       /* 
+                        * new intermediate node. 
+                        * RTN_BACKTRACK will
+                        * be off since that an address that chooses one of
+                        * the branches would not match less specific routes
+                        * int the other branch
+                        */
+
+                       in->fn_bit = bit;
+                       in->parent = pn;
+                       in->leaf = rt;
+                       in->fn_sernum = sernum;
+                       atomic_inc(&rt->rt_ref);
+
+                       /* leaf node */
+                       ln = kmalloc(sizeof(struct fib6_node), GFP_ATOMIC);
+
+                       if (ln == NULL)
+                       {
+                               kfree(in);
+                               return (-ENOMEM);
+                       }
+
+                       /* update parent pointer */
+                       if (dir)
+                               pn->right = in;
+                       else
+                               pn->left  = in;
+
+                       memset(ln, 0, sizeof(struct fib6_node));
+                       ln->fn_bit   = pbit;
+                       ln->fn_flags = RTN_BACKTRACK;
+                       
+                       ln->parent = in;
+                       fn->parent = in;
+
+                       ln->leaf = rt;
+                       ln->fn_sernum = sernum;
+                       atomic_inc(&rt->rt_ref);
+
+                       rt->fib_node = ln;
+
+                       if (addr_bit_set(addr, bit))
+                       {
+                               in->right = ln;
+                               in->left  = fn;
+                       }
+                       else
+                       {
+                               in->left  = ln;
+                               in->right = fn;
+                       }
+
+                       rt6_stats.fib_nodes += 2;
+                       rt6_stats.fib_route_nodes++;
+                       rt6_stats.fib_rt_entries++;
+
+                       return 0;
+               }
+
+               /* 
+                *              (new leaf node)
+                *                /        \
+                *           (old node)    NULL
+                */
+
+               ln = kmalloc(sizeof(struct fib6_node), GFP_ATOMIC);
+
+               if (ln == NULL)
+                       return (-ENOMEM);
+
+               memset(ln, 0, sizeof(struct fib6_node));
+               ln->fn_bit   = pbit;
+               ln->fn_flags = RTN_BACKTRACK;
+                       
+
+               ln->parent = pn;
+               ln->leaf = rt;
+               ln->fn_sernum = sernum;
+               atomic_inc(&rt->rt_ref);
+
+               rt->fib_node = ln;
+
+               if (dir)
+                       pn->right = ln;
+               else
+                       pn->left  = ln;
+               
+
+               if (addr_bit_set(&fn->leaf->rt_dst, pbit))
+                       ln->right = fn;
+               else
+                       ln->left  = fn; 
+
+               fn->parent = ln;
+
+               rt6_stats.fib_nodes++;
+               rt6_stats.fib_route_nodes++;
+               rt6_stats.fib_rt_entries++;
+
+               return(0);
+       }
+
+       return (-1);
+}
+
+static struct rt6_info * fib6_lookup_1(struct in6_addr *addr, int flags)
+{
+       struct fib6_node *fn, *next;
+       int dir;
+
+       fn = &routing_table;
+
+       for (;;)
+       {
+               dir = addr_bit_set(addr, fn->fn_bit);
+
+               next = dir ? fn->right: fn->left;
+
+               if (next)
+               {
+                       fn = next;
+                       continue;
+               }
+
+               break;
+       }
+
+
+       while ((fn->fn_flags & RTN_ROOT) == 0)
+       {
+               if (fn->fn_flags & RTN_BACKTRACK)
+               {
+                       if (addr_match(&fn->leaf->rt_dst, addr, 
+                                      fn->leaf->rt_prefixlen))
+                       {
+                               struct rt6_info *rt;
+                               
+                               for (rt = fn->leaf; rt; rt = rt->next)
+                               {
+                                       if ((rt->rt_flags & flags) == 0)
+                                               return rt;
+                               }
+                       }
+               }
+               
+               fn = fn->parent;
+       }
+
+       return NULL;
+}
+
+
+
+/*
+ *     called to trim the tree of intermediate nodes when possible
+ */
+
+static void fib6_del_3(struct fib6_node *fn)
+{
+       int children = 0;
+       int dir = 0;
+       int bit;
+
+       /*
+        *      0 or one children:
+        *              delete the node
+        *
+        *      2 children:
+        *              move the bit down
+        */
+
+       if (fn->left)
+       {
+               children++;
+               dir = 0;
+       }
+
+       if (fn->right)
+       {
+               children++;
+               dir = 1;
+       }
+
+       if (children < 2)
+       {
+               struct fib6_node *child;
+
+               child = dir ? fn->right : fn->left;
+
+               if (fn->parent->left == fn)
+               {
+                       fn->parent->left = child;
+               }
+               else
+               {
+                       fn->parent->right = child;
+               }
+
+               if (child)
+               {
+                       child->parent = fn->parent;
+               }
+
+               /* 
+                *      try to collapse on top
+                */                     
+               if ((fn->parent->fn_flags & (RTN_BACKTRACK | RTN_ROOT)) == 0)
+               {
+                       if (fn->leaf)
+                       {
+                               rt_release(fn->leaf);
+                               fn->leaf = NULL;
+                       }
+                       fib6_del_3(fn->parent);
+               }
+               if (fn->fn_flags & RTN_BACKTRACK)
+               {
+                       rt6_stats.fib_route_nodes--;
+               }
+               rt6_stats.fib_nodes--;
+               kfree(fn);
+               return;
+       }
+
+       bit = addr_diff(&fn->left->leaf->rt_dst, &fn->right->leaf->rt_dst);
+       
+       fn->fn_bit = bit;
+       fn->fn_flags &= ~RTN_BACKTRACK;
+       fn->leaf = fn->left->leaf;
+
+       rt6_stats.fib_route_nodes--;
+}
+
+static struct fib6_node * fib6_del_2(struct in6_addr *addr, __u32 prefixlen, 
+                                    struct in6_addr *gw, struct device *dev)
+{
+       struct fib6_node *fn;
+
+       for (fn = &routing_table; fn;) 
+       {
+               int dir;
+
+               if ((fn->fn_flags & RTN_BACKTRACK) &&
+                   prefixlen == fn->leaf->rt_prefixlen &&
+                   addr_match(&fn->leaf->rt_dst, addr, fn->leaf->rt_prefixlen)
+                   )
+               {
+                       break;
+               }
+
+               dir = addr_bit_set(addr, fn->fn_bit);
+
+               fn = dir ? fn->right: fn->left;
+       }
+
+       /* 
+        *      if route tree node found
+        *      search among it's entries
+        */
+
+       if (fn)
+       {
+               struct rt6_info *back = NULL;
+               struct rt6_info *lf;
+
+               for(lf = fn->leaf; lf; lf=lf->next)
+               {
+                       if ((gw && (ipv6_addr_cmp(addr, &lf->rt_dst) == 0)) ||
+                           (dev && dev == lf->rt_dev))
+                       {
+                               /* delete this entry */
+                               if (back == NULL)
+                                       fn->leaf = lf->next;
+                               else
+                                       back->next = lf->next;
+
+                               lf->fib_node = NULL;
+                               rt_release(lf);
+                               return fn;              
+                       }
+                       back = lf;
+               }
+       }
+
+       return NULL;
+}
+
+static struct fib6_node * fib6_del_rt_2(struct in6_addr *addr, __u32 prefixlen,
+                                       struct rt6_info *rt)
+{
+       struct fib6_node *fn;
+
+       for (fn = &routing_table; fn;) 
+       {
+               int dir;
+
+               if ((fn->fn_flags & RTN_BACKTRACK) &&
+                   prefixlen == fn->leaf->rt_prefixlen &&
+                   addr_match(&fn->leaf->rt_dst, addr, fn->leaf->rt_prefixlen)
+                   )
+               {
+                       break;
+               }
+
+               dir = addr_bit_set(addr, fn->fn_bit);
+
+               fn = dir ? fn->right: fn->left;
+       }
+
+       /* 
+        *      if route tree node found
+        *      search among its entries
+        */
+
+       if (fn)
+       {
+               struct rt6_info *back = NULL;
+               struct rt6_info *lf;
+
+               for(lf = fn->leaf; lf; lf=lf->next)
+               {
+                       if (rt == lf)
+                       {
+                               /* delete this entry */
+                               if (back == NULL)
+                                       fn->leaf = lf->next;
+                               else
+                                       back->next = lf->next;
+
+                               lf->fib_node = NULL;
+                               rt_release(lf);
+                               return fn;
+                       }
+                       back = lf;
+               }
+       }
+
+       return NULL;
+}
+
+int fib6_del_1(struct in6_addr *addr, __u32 prefixlen, struct in6_addr *gw, 
+              struct device *dev)
+{
+       struct fib6_node *fn;
+
+       fn = fib6_del_2(addr, prefixlen, gw, dev);
+
+       if (fn == NULL)
+               return -ENOENT;
+       
+       if (fn->leaf == NULL)
+       {
+               fib6_del_3(fn);
+       }
+
+       return 0;
+}
+
+int fib6_del_rt(struct rt6_info *rt)
+{
+       struct fib6_node *fn;
+
+       fn = fib6_del_rt_2(&rt->rt_dst, rt->rt_prefixlen, rt);
+
+       if (fn == NULL)
+               return -ENOENT;
+       
+       if (fn->leaf == NULL)
+       {
+               fib6_del_3(fn);
+       }
+
+       return 0;
+}
+
+static void fib6_flush_1(struct fib6_node *fn, void *p_arg)
+{
+       struct rt6_info *rt;
+
+       for (rt = fn->leaf; rt;)
+       {
+               struct rt6_info *itr;
+
+               itr = rt;
+               rt = rt->next;
+               itr->fib_node = NULL;
+               rt_release(itr);
+       }
+       
+       if (fn->fn_flags & RTN_BACKTRACK)
+       {
+               rt6_stats.fib_route_nodes--;
+       }
+       rt6_stats.fib_nodes--;
+       kfree(fn);
+}
+
+void fib6_flush(void)
+{
+       rt6_walk_tree(fib6_flush_1, NULL, 0);
+}
+
+int ipv6_route_add(struct in6_rtmsg *rtmsg)
+{
+       struct rt6_info *rt;
+       struct device * dev = NULL;
+       struct rt6_req *request;
+       int flags = rtmsg->rtmsg_flags;
+
+       dev = dev_get(rtmsg->rtmsg_device);
+       
+       rt = (struct rt6_info *) kmalloc(sizeof(struct rt6_info),
+                                        GFP_ATOMIC);
+
+       rt6_stats.fib_rt_alloc++;
+
+       memset(rt, 0, sizeof(struct rt6_info));
+               
+       memcpy(&rt->rt_dst, &rtmsg->rtmsg_dst, sizeof(struct in6_addr));
+       rt->rt_prefixlen = rtmsg->rtmsg_prefixlen;
+       
+       if (flags & (RTF_GATEWAY | RTF_NEXTHOP)) 
+       {
+               /* check to see if its an acceptable gateway */
+               if (flags & RTF_GATEWAY)
+               {
+                       struct rt6_info *gw_rt;
+
+                       gw_rt = fibv6_lookup(&rtmsg->rtmsg_gateway, NULL,
+                                            RTI_GATEWAY);
+
+                       if (gw_rt == NULL)
+                       {
+                               return -EHOSTUNREACH;
+                       }
+
+                       dev = gw_rt->rt_dev;
+               }
+
+               rt->rt_nexthop = ndisc_get_neigh(dev, &rtmsg->rtmsg_gateway);
+
+               if (rt->rt_nexthop == NULL)
+               {
+                       printk(KERN_DEBUG "ipv6_route_add: no nexthop\n");
+                       kfree(rt);
+                       return -EINVAL;
+               }
+
+               rt->rt_dev = dev;
+
+               if (loopback_rt == NULL && (dev->flags & IFF_LOOPBACK))
+               {
+                       loopback_rt = rt;
+               }
+
+       }
+       else
+       {
+               if (dev == NULL)
+               {
+                       printk(KERN_DEBUG "ipv6_route_add: NULL dev\n");
+                       kfree(rt);
+                       return -EINVAL;
+               }
+
+               rt->rt_dev = dev;
+               rt->rt_nexthop = NULL;
+       }
+       
+       rt->rt_metric = rtmsg->rtmsg_metric;
+       rt->rt_flags = rtmsg->rtmsg_flags;
+
+       if (rt->rt_flags & RTF_ADDRCONF)
+       {
+               rt->rt_expires = rtmsg->rtmsg_info;
+       }
+
+       request = kmalloc(sizeof(struct rt6_req), GFP_ATOMIC);
+       if (request == NULL)
+       {
+               printk(KERN_WARNING "ipv6_route_add: kmalloc failed\n");
+               return -ENOMEM;
+       }
+
+       request->operation = RT_OPER_ADD;
+       request->ptr = rt;
+       request->next = request->prev = NULL;
+       rtreq_queue(request);
+       rt6_bh_mask |= RT_BH_REQUEST;
+
+       rt6_run_bh();
+
+       return 0;
+}
+
+int ipv6_route_del(struct in6_rtmsg *rtmsg)
+{
+       struct rt6_info * rt;
+
+       rt = fib6_lookup_1(&rtmsg->rtmsg_dst, 0);
+       if (!rt || (rt && (rt->rt_prefixlen != rtmsg->rtmsg_prefixlen)))
+               return -ENOENT;
+       return fib6_del_rt(rt);
+}
+
+/*
+ *     search the routing table
+ *     the flags parameter restricts the search to entries where
+ *     the flag is *not* set
+ */
+struct rt6_info * fibv6_lookup(struct in6_addr *addr, struct device *src_dev,
+                              int flags)
+{
+       struct rt6_info *rt;
+
+       if ((rt = fib6_lookup_1(addr, flags)))
+       {
+               if (src_dev)
+               {
+                       for (; rt; rt=rt->next)
+                       {
+                               if (rt->rt_dev == src_dev)
+                                       return rt;
+                       }
+                       
+                       if (flags & RTI_DEVRT)
+                       {
+                               return NULL;
+                       }
+               }
+
+               return rt;
+       }
+
+       if (!(flags & RTI_GATEWAY))
+       {
+               if ((rt = dflt_rt_lookup()))
+               {
+                       return rt;
+               }
+
+               return last_resort_rt;          
+       }
+
+       return NULL;
+}
+
+/*
+ *     Destination Cache
+ */
+
+struct dest_entry * ipv6_dst_route(struct in6_addr * daddr,
+                                  struct device *src_dev,
+                                  int flags)
+{
+       struct dest_entry * dc = NULL;
+       struct rt6_info * rt;
+
+       atomic_inc(&rt6_lock);
+       
+       rt = fibv6_lookup(daddr, src_dev, flags);
+
+       if (rt == NULL)
+       {
+               goto exit;
+       }
+       
+       if (rt->rt_nexthop)
+       {
+               /*
+                *      We can use the generic route
+                *      (warning: the pmtu value maybe invalid)
+                */
+
+               dc = (struct dest_entry *) rt;
+               atomic_inc(&rt->rt_use);
+       }
+       else
+       {
+               struct rt6_req *request;
+
+               if (ipv6_chk_addr(daddr) && !(rt->rt_dev->flags & IFF_LOOPBACK))
+               {
+                       rt = loopback_rt;
+
+                       if (rt == NULL)
+                       {
+                               goto exit;
+                       }
+               }
+
+               /*
+                *      dynamicly allocate a new route
+                */
+               
+               dc = (struct dest_entry *) kmalloc(sizeof(struct dest_entry), 
+                                          GFP_ATOMIC);
+
+               if (dc == NULL)
+               {
+                       printk(KERN_WARNING "dst_route: kmalloc failed\n");
+                       goto exit;
+               }
+
+               rt6_stats.fib_rt_alloc++;
+               rt6_stats.fib_dc_alloc++;
+
+               memset(dc, 0, sizeof(struct dest_entry));
+
+               memcpy(&dc->dc_addr, daddr, sizeof(struct in6_addr));
+               dc->rt.rt_prefixlen = 128;
+               dc->dc_usecnt = 1;
+               dc->rt.rt_metric = rt->rt_metric;
+
+               dc->dc_flags = (rt->rt_flags | RTF_HOST | RTI_DYNAMIC |
+                               RTI_DCACHE | DCF_PMTU);
+
+               dc->dc_pmtu = rt->rt_dev->mtu;
+               dc->rt.rt_dev = rt->rt_dev;
+               dc->rt.rt_output_method = rt->rt_output_method;
+               dc->dc_tstamp = jiffies;
+               /* add it to the request queue */
+               
+               request = kmalloc(sizeof(struct rt6_req), GFP_ATOMIC);
+
+               if (request == NULL)
+               {
+                       printk(KERN_WARNING "dst_route: kmalloc failed\n");
+                       dc = NULL;
+                       goto exit;
+               }
+
+               dc->dc_nexthop = ndisc_get_neigh(rt->rt_dev, daddr);
+
+               rt6_bh_mask |= RT_BH_REQUEST;
+               
+               request->operation = RT_OPER_ADD;
+               request->ptr = (struct rt6_info *) dc;
+               request->next = request->prev = NULL;
+               rtreq_queue(request);
+       }
+
+       atomic_inc(&rt_clients);
+
+  exit:
+       
+       atomic_dec(&rt6_lock);
+       rt6_run_bh();
+
+       return dc;
+}
+
+/*
+ *     check cache entry for vality...
+ *     this needs to be done as a inline func that calls
+ *     ipv6_slow_dst_check if entry is invalid
+ */
+
+struct dest_entry * ipv6_dst_check(struct dest_entry *dc,
+                                  struct in6_addr *daddr,
+                                  __u32 sernum, int flags)
+{
+       int uptodate = 0;
+
+       /*
+        *      destination cache becomes invalid when routing
+        *      changes or a more specific dynamic entry is
+        *      created.
+        *      if route is removed from table fib_node will
+        *      become NULL
+        */
+
+       if (dc->rt.fib_node && (dc->rt.fib_node->fn_sernum == sernum))
+               uptodate = 1;
+
+       if (uptodate && ((dc->dc_flags & DCF_INVALID) == 0))
+       {
+               if (dc->dc_nexthop && !(dc->dc_nexthop->flags & NCF_NOARP))
+               {
+                       ndisc_event_send(dc->dc_nexthop, NULL);
+               }
+               return dc;
+       }
+
+       /* route for destination may have changed */
+
+       ipv6_dst_unlock(dc);
+
+       return ipv6_dst_route(daddr, NULL, flags);
+}
+
+void ipv6_dst_unlock(struct dest_entry *dc)
+{
+       /*
+        *      decrement counter and mark entry for deletion
+        *      if counter reaches 0. we delay deletions in hope
+        *      we can reuse cache entries.
+        */
+
+       atomic_dec(&dc->dc_usecnt);
+       
+       if (dc->dc_usecnt == 0)
+       {
+
+               if (dc->dc_flags & RTI_DCACHE)
+               {
+                       /*
+                        *      update last usage tstamp
+                        */
+
+                       dc->dc_tstamp = jiffies;
+                       rt6_bh_mask |= RT_BH_GC;
+               }
+
+               if (dc->rt.rt_ref == 0)
+               {
+                       /*
+                        *      entry out of the routing table
+                        *      pending to be released on last deref
+                        */
+
+                       if (dc->dc_nexthop)
+                       {
+                               ndisc_dec_neigh(dc->dc_nexthop);
+                       }
+                       
+                       if (dc->dc_flags & RTI_DCACHE)
+                       {
+                               rt6_stats.fib_dc_alloc--;
+                       }
+
+                       rt6_stats.fib_rt_alloc--;
+                       kfree(dc);
+               }
+
+       }
+
+       atomic_dec(&rt_clients);
+}
+
+/*
+ *     Received a packet too big icmp that lowers the mtu for this
+ *     address. If the route for the destination is genric we create
+ *     a new route with the apropriate MTU info. The route_add
+ *     procedure will update the serial number on the generic routes
+ *     belonging to the afected tree forcing clients to request a route
+ *     lookup.
+ */
+void rt6_handle_pmtu(struct in6_addr *addr, int pmtu)
+{
+       struct rt6_info *rt;
+       struct rt6_req *req;
+       struct dest_entry *dc;
+
+       printk(KERN_DEBUG "rt6_handle_pmtu\n");
+
+       if (pmtu < 0 || pmtu > 65536)
+       {
+               printk(KERN_DEBUG "invalid MTU value\n");
+               return;
+       }
+
+       rt = fibv6_lookup(addr, NULL, 0);
+
+       if (rt == NULL)
+       {
+               printk(KERN_DEBUG "rt6_handle_pmtu: route not found\n");
+               return;
+       }
+
+       if (rt->rt_flags & RTI_DCACHE)
+       {
+               /*
+                *      we do have a destination cache entry for this
+                *      address.
+                */
+               
+               dc = (struct dest_entry *) rt;
+               
+               /*
+                *      fixme: some sanity checks are likely to be needed
+                *       here
+                */
+
+               dc->dc_pmtu = pmtu;
+               dc->dc_flags |= DCF_PMTU;
+               return;
+       }
+
+       req = (struct rt6_req *) kmalloc(sizeof(struct rt6_req), GFP_ATOMIC);
+
+       /* now add the new destination cache entry      */
+       
+       dc = (struct dest_entry *) kmalloc(sizeof(struct dest_entry),
+                                          GFP_ATOMIC);
+       
+       rt6_stats.fib_rt_alloc++;
+       rt6_stats.fib_dc_alloc++;
+
+       memset(dc, 0, sizeof(struct dest_entry));
+       
+       memcpy(&dc->dc_addr, addr, sizeof(struct in6_addr));
+       dc->rt.rt_prefixlen = 128;
+       dc->rt.rt_metric = rt->rt_metric;
+
+       dc->dc_flags = (rt->rt_flags | RTI_DYNAMIC | RTI_DCACHE | DCF_PMTU |
+                       RTF_HOST);
+
+       dc->dc_pmtu = pmtu;
+       dc->dc_tstamp = jiffies;
+       
+       dc->dc_nexthop = rt->rt_nexthop;
+       atomic_inc(&dc->dc_nexthop->refcnt);
+
+       dc->rt.rt_dev = rt->rt_dev;
+       dc->rt.rt_output_method = rt->rt_output_method;
+
+       req->operation = RT_OPER_ADD;
+       req->ptr = (struct rt6_info *) dc;
+       req->next = req->prev = NULL;
+
+       rtreq_queue(req);
+
+       rt6_bh_mask |= RT_BH_REQUEST;
+
+       rt6_run_bh();
+}
+
+/*
+ *     Redirect received: target is nexthop for dest
+ */
+struct rt6_info * ipv6_rt_redirect(struct device *dev, struct in6_addr *dest,
+                                  struct in6_addr *target, int on_link)
+                                      
+{
+       struct rt6_info *rt;
+       struct rt6_req *req;
+       int metric;
+
+       rt = fibv6_lookup(dest, dev, 0);
+
+       if (rt == NULL)
+       {
+               printk(KERN_WARNING "rt_redirect: unable to locate route\n");
+               return NULL;
+       }
+
+       metric = rt->rt_metric;
+
+       if ((rt->rt_flags & RTF_HOST) == 0)
+       {
+               /* Need to create an host route for this address */
+               
+               rt = (struct rt6_info *) kmalloc(sizeof(struct rt6_info),
+                                                GFP_ATOMIC);
+               memset(rt, 0, sizeof(struct rt6_info));
+               ipv6_addr_copy(&rt->rt_dst, dest);
+               rt->rt_prefixlen = 128;
+               rt->rt_flags = RTF_HOST | RTF_UP;
+               rt->rt_dev = dev;
+
+               /*
+                *      clone rt->rt_output_method ?
+                */
+
+               rt->rt_metric = metric;
+
+               rt6_stats.fib_rt_alloc++;
+
+               req = (struct rt6_req *) kmalloc(sizeof(struct rt6_req),
+                                                GFP_ATOMIC);
+               req->operation = RT_OPER_ADD;
+               req->ptr  = rt;
+               req->next = req->prev = NULL;
+               
+               rtreq_queue(req);
+               rt6_bh_mask |= RT_BH_REQUEST;
+       }
+       else
+       {
+               rt->rt_flags |= RTF_MODIFIED;
+       }
+
+       rt->rt_flags |= RTF_DYNAMIC;
+       if (on_link)
+       {
+               rt->rt_flags &= ~RTF_GATEWAY;
+       }
+       else
+       {
+               rt->rt_flags |= RTF_GATEWAY;
+       }
+
+       if (rt->rt_nexthop)
+       {
+               if (ipv6_addr_cmp(&rt->rt_nexthop->addr, target) == 0)
+               {
+                       atomic_inc(&rt->rt_nexthop->refcnt);
+                       goto exit;
+               }
+               else
+               {
+                       ndisc_dec_neigh(rt->rt_nexthop);
+               }
+       }
+       
+       rt->rt_nexthop = ndisc_get_neigh(dev, target);
+
+  exit:
+       rt6_run_bh();
+       return rt;
+}
+
+static int dcache_gc_node(struct fib6_node *fn, int timeout)
+{
+       struct rt6_info *rt, *back;
+       int more = 0;
+       unsigned long now = jiffies;
+
+       back = NULL;
+
+       for (rt = fn->leaf; rt;)
+       {
+               if ((rt->rt_flags & RTI_DCACHE) && rt->rt_use == 0)
+               {
+                       struct dest_entry *dc;
+                       
+                       dc = (struct dest_entry *) rt;
+                       
+                       if (now - dc->dc_tstamp > timeout)
+                       {
+                               struct rt6_info *old;
+
+                               old = rt;
+
+                               rt = rt->next;
+
+                               if (back == NULL)
+                               {
+                                       fn->leaf = rt;
+                               }
+                               else
+                               {
+                                       back->next = rt;
+                               }
+
+                               old->fib_node = NULL;
+                               rt_release(old);
+                               rt6_stats.fib_rt_entries--;
+                               continue;
+                       }
+                       else
+                       {
+                               more++;
+                       }
+               }
+
+               back = rt;
+               rt = rt->next;
+       }
+
+       if (fn->leaf == NULL)
+       {
+               return -1;
+       }
+       return more;
+}
+
+struct dc_gc_args {
+       unsigned long   timeout;
+       int             more;
+};
+
+static void dc_garbage_collect(struct fib6_node *fn, void *p_arg)
+{
+       struct dc_gc_args * args = (struct dc_gc_args *) p_arg;
+       
+       if (fn->fn_flags & RTN_BACKTRACK)
+       {
+               if (fn->fn_bit == 127)
+               {
+                       int more;
+                                               
+                       more = dcache_gc_node(fn, args->timeout);
+
+                       if (more == -1)
+                       {
+                               if (fn->parent->left == fn)
+                                       fn->parent->left = NULL;
+                               else
+                                       fn->parent->right = NULL;
+                               
+                               kfree(fn);
+
+                               rt6_stats.fib_nodes--;
+                               rt6_stats.fib_route_nodes--;
+                               
+                               return;
+                       }
+                       args->more += more;
+               }
+       }
+       else if (!(fn->fn_flags & RTN_ROOT))
+       {
+               int children = 0;
+               struct fib6_node *chld = NULL;
+
+               if (fn->left)
+               {
+                       children++;
+                       chld = fn->left;                        
+               }
+                       
+               if (fn->right)
+               {
+                       children++;
+                       chld = fn->right;
+               }
+               
+               if (children <= 1)
+               {                       
+                       struct fib6_node *pn = fn->parent;
+                       
+                       if (pn->left == fn)
+                       {
+                               pn->left = chld;
+                       }
+                       else
+                       {
+                               pn->right = chld;
+                       }
+                       
+                       if (chld)
+                       {
+                               chld->parent = pn;
+                       }
+                       
+                       rt_release(fn->leaf);
+                       
+                       rt6_stats.fib_nodes--;
+                       kfree(fn);
+               }              
+       }
+}
+
+/*
+ *     called with ints off
+ */
+
+static void __rt6_run_bh(void)
+{
+       static last_gc_run = 0;
+
+       if (rt6_bh_mask & RT_BH_REQUEST)
+       {
+               struct rt6_req *request;
+
+               while ((request = rtreq_dequeue()))
+               {
+                       struct rt6_info *rt;
+
+                       rt = request->ptr;
+
+                       switch (request->operation) {
+                       case RT_OPER_ADD:
+                               fib6_add_1(rt);
+                               break;
+
+                       case RT_OPER_DEL:
+                               fib6_del_rt(rt);
+                               break;
+
+                       default:
+                               printk(KERN_WARNING
+                                      "rt6_run_bh: bad request in queue\n");
+                       }
+
+                       kfree(request);
+               }
+
+               rt6_bh_mask &= ~RT_BH_REQUEST;
+       }
+
+       if (rt6_bh_mask & RT_BH_GC)
+       {
+               if (jiffies - last_gc_run > DC_TIME_RUN)
+               {
+                       struct dc_gc_args args;
+
+                       if (rt6_stats.fib_dc_alloc >= DC_WATER_MARK)
+                               args.timeout = DC_SHORT_TIMEOUT;
+                       else
+                               args.timeout = DC_LONG_TIMEOUT;
+
+                       args.more = 0;
+                       rt6_walk_tree(dc_garbage_collect, &args, 0);
+
+                       last_gc_run = jiffies;
+                       
+                       if (!args.more)
+                       {
+                               rt6_bh_mask &= ~RT_BH_GC;
+                       }
+               }
+       }
+}
+
+/*
+ *     Timer for expiring routes learned via addrconf and stale DC 
+ *     entries when there is no network actuvity
+ */
+
+void rt6_timer_handler(unsigned long data)
+{
+       unsigned long flags;
+
+       save_flags(flags);
+       cli();
+
+       if (rt6_lock == 0)
+       {
+               if (rt_clients == 0 && rt6_bh_mask)
+               {
+                       __rt6_run_bh();
+               }
+
+               /*
+                *      route expiry
+                */
+               
+               rt6_walk_tree(rt6_rt_timeout, NULL, 1);
+       }
+
+       restore_flags(flags);
+
+       rt6_gc_timer.expires = jiffies + 4 * DC_LONG_TIMEOUT;
+       add_timer(&rt6_gc_timer);
+}
+
+/*
+ *     Check if routes should be timed out.
+ *     Called from rt6_walk_tree for every node.
+ */
+
+static void rt6_rt_timeout(struct fib6_node *fn, void *arg)
+{
+       struct rt6_info *rt;
+       unsigned long now = jiffies;
+
+       for (rt = fn->leaf; rt; rt = rt->next)
+       {
+               if ((rt->rt_flags & RTF_ADDRCONF) && now > rt->rt_expires)
+               {
+                       struct rt6_req *req;
+
+                       /*
+                        *      request route deletion. routes will only
+                        *      be deleted after walk_tree completes
+                        */
+
+                       req = (struct rt6_req *) kmalloc(sizeof(struct rt6_req),
+                                                        GFP_ATOMIC);
+                       req->operation = RT_OPER_DEL;
+                       req->ptr  = rt;
+                       req->next = req->prev = NULL;
+               }
+       }
+}
+
+int ipv6_route_ioctl(unsigned int cmd, void *arg)
+{
+       struct in6_rtmsg rtmsg;
+       int err;
+
+       switch(cmd) 
+       {
+               case SIOCADDRT:         /* Add a route */
+               case SIOCDELRT:         /* Delete a route */
+                       if (!suser())
+                               return -EPERM;
+                       err = copy_from_user(&rtmsg, arg,
+                                            sizeof(struct in6_rtmsg));
+                       if (err)
+                               return -EFAULT;
+                       return (cmd == SIOCDELRT) ? ipv6_route_del(&rtmsg) : 
+                               ipv6_route_add(&rtmsg);
+       }
+
+       return -EINVAL;
+}
+
+static void rt6_walk_tree(f_pnode func, void * arg, int filter)
+{
+       struct fib6_node *fn;
+       /*
+        *      adquire lock
+        *      this warranties that the operation will be atomic with
+        *      respect to the garbage collect routine that also does
+        *      a tree transversal and tags nodes with the RTN_TAG flag
+        */
+       atomic_inc(&rt6_lock);
+
+       fn = &routing_table;
+
+       do {
+               if (!(fn->fn_flags & RTN_TAG))
+               {
+                       fn->fn_flags |= RTN_TAG;
+
+                       if (fn->left)
+                       {
+                               fn = fn->left;
+                               continue;
+                       }
+               }
+
+               fn->fn_flags &= ~RTN_TAG;
+
+               if (fn->right)
+               {
+                       fn = fn->right;
+                       continue;
+               }
+                       
+               do {
+                       struct fib6_node *node;
+                       
+                       if (fn->fn_flags & RTN_ROOT)
+                               break;
+                       node = fn;
+                       fn = fn->parent;
+                       
+                       if (!(node->fn_flags & RTN_TAG) && 
+                           (!filter || (node->fn_flags & RTN_BACKTRACK)))
+                       {
+                               (*func)(node, arg);
+                       }
+
+               } while (!(fn->fn_flags & RTN_TAG));
+               
+       } while (!(fn->fn_flags & RTN_ROOT) || (fn->fn_flags & RTN_TAG));
+
+       atomic_dec(&rt6_lock);
+}
+
+#ifdef CONFIG_PROC_FS
+#define RT6_INFO_LEN (32 + 2 + 32 + 2 + 2 + 2 + 4 + 8 + 7)
+
+struct rt6_proc_arg {
+       char *buffer;
+       int offset;
+       int skip;
+       int len;
+};
+
+static void rt6_info_node(struct fib6_node *fn, void *p_arg)
+{
+       struct rt6_info *rt;
+       struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
+
+       for (rt = fn->leaf; rt; rt = rt->next)
+       {
+               int i;
+
+               if (arg->skip < arg->offset / RT6_INFO_LEN)
+               {
+                       arg->skip++;
+                       continue;
+               }
+       
+               for (i=0; i<16; i++)
+               {
+                       sprintf(arg->buffer + arg->len, "%02x",
+                               rt->rt_dst.s6_addr[i]);
+                       arg->len += 2;
+               }
+               arg->len += sprintf(arg->buffer + arg->len, " %02x ",
+                                   rt->rt_prefixlen);
+               if (rt->rt_nexthop)
+               {
+                       for (i=0; i<16; i++)
+                       {
+                               sprintf(arg->buffer + arg->len, "%02x",
+                                       rt->rt_nexthop->addr.s6_addr[i]);
+                               arg->len += 2;
+                       }
+               }
+               else
+               {
+                       sprintf(arg->buffer + arg->len,
+                               "00000000000000000000000000000000");
+                       arg->len += 32;
+               }
+               arg->len += sprintf(arg->buffer + arg->len,
+                                   " %02x %02x %02x %04x %8s\n",
+                                   rt->rt_metric, rt->rt_use,
+                                   rt->rt_ref, rt->rt_flags, 
+                                   rt->rt_dev ? rt->rt_dev->name : "");
+       }
+}
+
+static int rt6_proc_info(char *buffer, char **start, off_t offset, int length,
+                        int dummy)
+{
+       struct rt6_proc_arg arg;
+       struct fib6_node sfn;
+       arg.buffer = buffer;
+       arg.offset = offset;
+       arg.skip = 0;
+       arg.len = 0;
+
+       rt6_walk_tree(rt6_info_node, &arg, 1);
+       
+       sfn.leaf = default_rt_list;
+       rt6_info_node(&sfn, &arg);
+
+       sfn.leaf = last_resort_rt;
+       rt6_info_node(&sfn, &arg);
+                            
+       *start = buffer;
+
+       if (offset)
+               *start += offset % RT6_INFO_LEN;
+
+       arg.len -= offset % RT6_INFO_LEN;
+
+       if (arg.len > length)
+               arg.len = length;
+
+       return arg.len;
+}
+
+
+static int rt6_proc_stats(char *buffer, char **start, off_t offset, int length,
+                         int dummy)
+{
+       int len;
+
+       len = sprintf(buffer, "%04x %04x %04x %04x %04x\n",
+                     rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
+                     rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
+                     rt6_stats.fib_dc_alloc);
+
+       len -= offset;
+
+       if (len > length)
+               len = length;
+
+       *start = buffer + offset;
+
+       return len;
+}
+
+#endif                 /* CONFIG_PROC_FS */
+
+void ipv6_route_init(void)
+{
+#ifdef         CONFIG_PROC_FS
+       proc_net_register(&(struct proc_dir_entry) {
+               PROC_NET_RT6, 6, "route6",
+               S_IFREG | S_IRUGO, 1, 0, 0,
+               0, &proc_net_inode_operations,
+               rt6_proc_info
+       });
+       proc_net_register(&(struct proc_dir_entry) {
+               PROC_NET_RT6_STATS, 9, "rt6_stats",
+               S_IFREG | S_IRUGO, 1, 0, 0,
+               0, &proc_net_inode_operations,
+               rt6_proc_stats
+       });
+
+#endif
+       rt6_gc_timer.expires = jiffies + 4 * DC_LONG_TIMEOUT;
+       add_timer(&rt6_gc_timer);
+       netlink_attach(NETLINK_ROUTE6, rt6_msgrcv);
+}
+
+#ifdef MODULE
+void ipv6_route_cleanup(void)
+{
+       proc_net_unregister(PROC_NET_RT6);
+       proc_net_unregister(PROC_NET_RT6_STATS);
+       netlink_detach(NETLINK_ROUTE6);
+       del_timer(&rt6_gc_timer);
+       fib6_flush();
+}
+#endif
+
+/*
+ *     NETLINK interface
+ *     routing socket moral equivalent
+ */
+
+static int rt6_msgrcv(struct sk_buff *skb)
+{
+       int count = 0;
+       struct in6_rtmsg *rtmsg;
+       
+       while (skb->len)
+       {
+               if (skb->len < sizeof(struct in6_rtmsg))
+               {
+                       count = -EINVAL;
+                       goto out;
+               }
+               
+               rtmsg = (struct in6_rtmsg *) skb->data;
+               skb_pull(skb, sizeof(struct in6_rtmsg));
+               count += sizeof(struct in6_rtmsg);
+
+               switch (rtmsg->rtmsg_type) {
+               case RTMSG_NEWROUTE:
+                       ipv6_route_add(rtmsg);
+                       break;
+               case RTMSG_DELROUTE:
+                       ipv6_route_del(rtmsg);
+                       break;
+               default:
+                       count = -EINVAL;
+                       goto out;
+               }
+       }
+
+  out:
+       kfree_skb(skb, FREE_READ);      
+       return count;
+}
+
+void rt6_sndmsg(__u32 type, struct in6_addr *dst, struct in6_addr *gw,
+               __u16 plen, __u16 metric, char *devname, __u16 flags)
+{
+       struct sk_buff *skb;
+       struct in6_rtmsg *msg;
+       
+       skb = alloc_skb(sizeof(struct in6_rtmsg), GFP_ATOMIC);
+       msg = (struct in6_rtmsg *) skb_put(skb, sizeof(struct in6_rtmsg));
+       
+       msg->rtmsg_type = type;
+
+       if (dst)
+       {
+               ipv6_addr_copy(&msg->rtmsg_dst, dst);
+       }
+       else
+               memset(&msg->rtmsg_dst, 0, sizeof(struct in6_addr));
+
+       if (gw)
+       {
+               ipv6_addr_copy(&msg->rtmsg_gateway, gw);
+       }
+       else
+               memset(&msg->rtmsg_gateway, 0, sizeof(struct in6_addr));
+
+       msg->rtmsg_prefixlen = plen;
+       msg->rtmsg_metric = metric;
+       strcpy(msg->rtmsg_device, devname);
+       msg->rtmsg_flags = flags;
+
+       if (netlink_post(NETLINK_ROUTE6, skb))
+       {
+               kfree_skb(skb, FREE_WRITE);
+       }
+}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
new file mode 100644 (file)
index 0000000..cac0a03
--- /dev/null
@@ -0,0 +1,290 @@
+/*
+ *     IPv6 BSD socket options interface
+ *     Linux INET6 implementation 
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *     Based on linux/net/ipv4/ip_sockglue.c
+ *
+ *     $Id: ipv6_sockglue.c,v 1.12 1996/10/29 22:45:53 roque Exp $
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+
+#include <linux/sysctl.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/ipv6_route.h>
+#include <net/addrconf.h>
+#include <net/inet_common.h>
+#include <net/sit.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+
+#include <asm/uaccess.h>
+
+struct ipv6_mib ipv6_statistics={0, };
+struct packet_type ipv6_packet_type =
+{
+       0, 
+       NULL,                                   /* All devices */
+       ipv6_rcv,
+       NULL,
+       NULL
+};
+
+/*
+ *     addrconf module should be notifyed of a device going up
+ */
+static struct notifier_block ipv6_dev_notf = {
+       addrconf_notify,
+       NULL,
+       0
+};
+
+int ipv6_setsockopt(struct sock *sk, int level, int optname, char *optval, 
+                   int optlen)
+{
+       struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+       int val, err;
+       int retv = -EOPNOTSUPP;
+
+       if(level!=SOL_IPV6)
+               goto out;
+
+       if (optval == NULL)
+       {
+               val=0;
+       }
+       else
+       {
+               err = get_user(val, (int *) optval);
+               if(err)
+                       return err;
+       }
+       
+
+       switch (optname) {
+
+       case IPV6_ADDRFORM:
+               if (val == PF_INET)
+               {
+                       if (sk->protocol != IPPROTO_UDP &&
+                           sk->protocol != IPPROTO_TCP)
+                       {                               
+                               goto out;
+                       }
+                       
+                       if (sk->state != TCP_ESTABLISHED)
+                       {
+                               retv = ENOTCONN;
+                               goto out;
+                       }
+                       
+                       if (!(ipv6_addr_type(&np->daddr) & IPV6_ADDR_MAPPED))
+                       {
+                               retv = -EADDRNOTAVAIL;
+                               goto out;
+                       }
+
+                       if (sk->protocol == IPPROTO_TCP)
+                       {
+                               struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+                               
+                               sk->prot = &tcp_prot;
+                               tp->af_specific = &ipv4_specific;
+                       }
+                       else
+                       {
+                               sk->prot = &udp_prot;
+                       }
+                       sk->socket->ops = &inet_proto_ops;
+                       retv = 0;
+               }
+               else
+               {
+                       retv = -EINVAL;
+               }
+               break;
+
+       case IPV6_RXINFO:
+               np->rxinfo = val;
+               retv = 0;
+               break;
+
+       case IPV6_UNICAST_HOPS:
+               if (val > 255)
+               {
+                       retv = -EINVAL;
+               }
+               else
+               {
+                       np->hop_limit = val;
+                       retv = 0;
+               }
+               break;
+
+       case IPV6_MULTICAST_HOPS:
+               if (val > 255)
+               {
+                       retv = -EINVAL;
+               }
+               else
+               {
+                       np->mcast_hops = val;
+                       retv = 0;
+               }
+               break;
+
+       case IPV6_MULTICAST_LOOP:
+               np->mc_loop = val;
+               break;
+
+       case IPV6_MULTICAST_IF:
+       {
+               struct in6_addr addr;
+
+               err=verify_area(VERIFY_READ, optval, sizeof(struct in6_addr));
+               if(err)
+                       return err;
+
+               err = copy_from_user(&addr, optval, sizeof(struct in6_addr));
+               if(err)
+                       return -EFAULT;
+                               
+               if (ipv6_addr_any(&addr))
+               {
+                       np->mc_if = NULL;
+               }
+               else
+               {
+                       struct inet6_ifaddr *ifp;
+
+                       ifp = ipv6_chk_addr(&addr);
+
+                       if (ifp == NULL)
+                       {
+                               retv = -EADDRNOTAVAIL;
+                               break;
+                       }
+
+                       np->mc_if = ifp->idev->dev;
+               }
+               retv = 0;
+               break;
+       }
+       case IPV6_ADD_MEMBERSHIP:
+       case IPV6_DROP_MEMBERSHIP:
+       {
+               struct ipv6_mreq mreq;
+               struct inet6_ifaddr *ifp;
+               struct device *dev = NULL;
+               int err;
+
+               err = copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq));
+               if(err)
+                       return -EFAULT;
+               
+               if (ipv6_addr_any(&mreq.ipv6mr_interface))
+               {
+                       /* 
+                        *      FIXME
+                        *      default multicast rule.
+                        */
+               }
+               else
+               {
+                       if ((ifp = ipv6_chk_addr(&mreq.ipv6mr_interface)))
+                       {
+                               dev = ifp->idev->dev;
+                       }
+               }
+
+               if (dev == NULL)
+               {
+                       return -ENODEV;
+               }
+               
+               if (optname == IPV6_ADD_MEMBERSHIP)
+               {
+                       retv = ipv6_sock_mc_join(sk, dev, &mreq.ipv6mr_multiaddr);
+               }
+               else
+               {
+                       retv = ipv6_sock_mc_drop(sk, dev, &mreq.ipv6mr_multiaddr);
+               }
+       }
+       }
+
+  out:
+       return retv;
+}
+
+int ipv6_getsockopt(struct sock *sk, int level, int optname, char *optval, 
+                   int *optlen)
+{
+       return 0;
+}
+
+#ifdef MODULE
+
+/*
+ *     sysctl registration functions defined in sysctl_net_ipv6.c
+ */
+
+extern void ipv6_sysctl_register(void);
+extern void ipv6_sysctl_unregister(void);
+#endif
+
+void ipv6_init(void)
+{
+       ipv6_packet_type.type = ntohs(ETH_P_IPV6);
+
+       dev_add_pack(&ipv6_packet_type);
+
+#ifdef MODULE
+       ipv6_sysctl_register();
+#endif
+
+       register_netdevice_notifier(&ipv6_dev_notf);
+       
+       ipv6_route_init();
+}
+
+#ifdef MODULE
+void ipv6_cleanup(void)
+{
+       unregister_netdevice_notifier(&ipv6_dev_notf);
+       dev_remove_pack(&ipv6_packet_type);
+       ipv6_sysctl_unregister();       
+       ipv6_route_cleanup();
+       ndisc_cleanup();
+       addrconf_cleanup();     
+}
+#endif
+
+/*
+ * Local variables:
+ *  compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O6 -m486 -c ipv6_sockglue.c"
+ * End:
+ */
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
new file mode 100644 (file)
index 0000000..14ba9ef
--- /dev/null
@@ -0,0 +1,220 @@
+/*
+ *     Multicast support for IPv6
+ *     Linux INET6 implementation 
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *     Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c 
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/if_inet6.h>
+#include <net/ndisc.h>
+#include <net/ipv6_route.h>
+#include <net/addrconf.h>
+
+
+/*
+ *     socket join on multicast group
+ */
+int ipv6_sock_mc_join(struct sock *sk, struct device *dev, 
+                     struct in6_addr *addr)
+{
+       struct ipv6_mc_socklist *mc_lst;
+       struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+       int err;
+
+       if (!(ipv6_addr_type(addr) & IPV6_ADDR_MULTICAST))
+               return -EINVAL;
+
+       if(!(dev->flags & IFF_MULTICAST))
+               return -EADDRNOTAVAIL;
+
+       mc_lst = (struct ipv6_mc_socklist *) 
+               kmalloc(sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
+
+       if (mc_lst == NULL)
+               return -ENOMEM;
+
+       mc_lst->next = NULL;
+       memcpy(&mc_lst->addr, addr, sizeof(struct in6_addr));
+       mc_lst->dev  = dev;
+
+       /*
+        *      now add/increase the group membership on the device
+        */
+
+       err = ipv6_dev_mc_inc(dev, addr);
+
+       if (err)
+       {
+               kfree(mc_lst);
+               return err;
+       }
+
+       mc_lst->next = np->ipv6_mc_list;
+       np->ipv6_mc_list = mc_lst;
+
+       return 0;
+}
+
+/*
+ *     socket leave on multicast group
+ */
+int ipv6_sock_mc_drop(struct sock *sk, struct device *dev, 
+                     struct in6_addr *addr)
+{
+       return 0;
+}
+
+void ipv6_sock_mc_close(struct sock *sk)
+{
+       struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+       struct ipv6_mc_socklist *mc_lst;
+
+       for (mc_lst = np->ipv6_mc_list; mc_lst; )
+       {
+               struct ipv6_mc_socklist *back;
+
+               /*
+                *      leave group
+                */
+
+               back = mc_lst;
+               mc_lst = mc_lst->next;
+               kfree(back);
+       }
+}
+
+/*
+ *     device multicast group inc (add if not found)
+ */
+int ipv6_dev_mc_inc(struct device *dev, struct in6_addr *addr)
+{
+       struct ipv6_mc_list *mc;
+       struct inet6_dev    *i6dev;
+       char buf[6];
+       u8 hash;
+       
+       for (i6dev = inet6_dev_lst; i6dev; i6dev=i6dev->next)
+               if (i6dev->dev == dev)
+                       break;
+               
+       if (i6dev == NULL)
+       {
+               printk(KERN_DEBUG "ipv6_dev_mc_inc: device not found\n");
+               return -EINVAL;
+       }
+
+       for (mc = i6dev->mc_list; mc; mc = mc->if_next)
+               if (ipv6_addr_cmp(&mc->addr, addr) == 0)
+               {
+                       atomic_inc(&mc->users);
+                       return 0;
+               }
+
+       /*
+        *      not found: create a new one.
+        */
+
+       mc = (struct ipv6_mc_list *) kmalloc(sizeof(struct ipv6_mc_list),
+                                            GFP_ATOMIC);
+
+       if (mc == NULL)
+       {
+               return -ENOMEM;
+       }
+
+       memset(mc, 0, sizeof(struct ipv6_mc_list));
+
+       memcpy(&mc->addr, addr, sizeof(struct in6_addr));
+       mc->dev = dev;
+       mc->users = 1;
+
+       hash = ipv6_addr_hash(addr);
+
+       mc->next = inet6_mcast_lst[hash];
+       inet6_mcast_lst[hash] = mc;
+       
+       mc->if_next = i6dev->mc_list;
+       i6dev->mc_list = mc;
+
+       /*
+        *      multicast mapping is defined in IPv6-over-foo documents
+        */
+
+       switch (dev->type) {
+       case ARPHRD_ETHER:
+               ipv6_mc_map(addr, buf);
+               dev_mc_add(dev, buf, ETH_ALEN, 0);
+               break;
+               
+       default:
+               printk(KERN_DEBUG "dev_mc_inc: unkown device type\n");
+       }
+       
+
+       /*
+        *      FIXME: ICMP report handling
+        */
+
+       return 0;
+}
+
+/*
+ *     device multicast group del
+ */
+int ipv6_dev_mc_dec(struct device *dev, struct in6_addr *addr)
+{
+       return 0;
+}
+
+/*
+ *     check if the interface/address pair is valid
+ */
+int ipv6_chk_mcast_addr(struct device *dev, struct in6_addr *addr)
+{
+       struct ipv6_mc_list *mc;        
+       u8 hash;
+
+       hash = ipv6_addr_hash(addr);
+
+       for (mc = inet6_mcast_lst[hash]; mc; mc=mc->next)
+               if ((mc->dev == dev) &&
+                   ipv6_addr_cmp(&mc->addr, addr) == 0)
+               {
+                       return 1;
+               }
+
+       return 0;
+}
+
+/*
+ *     IGMP handling (alias multicast ICMPv6 messages)
+ */
+
+/*
+ * Local variables:
+ *  compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h  -c -o mcast.o mcast.c"
+ * End:
+ */
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
new file mode 100644 (file)
index 0000000..264c950
--- /dev/null
@@ -0,0 +1,1905 @@
+/*
+ *     Neighbour Discovery for IPv6
+ *     Linux INET6 implementation 
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *     Mike Shaver             <shaver@ingenia.com>
+ *
+ *     $Id: ndisc.c,v 1.28 1996/10/11 16:03:06 roque Exp $
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+/*
+ *     Interface:
+ *
+ *     ndisc_lookup will be called from eth.c on dev->(re)build_header
+ *
+ *     ndisc_rcv
+ *     ndisc_validate is called by higher layers when they know a neighbour
+ *                    is reachable.
+ *
+ *     Manages neighbour cache
+ *
+ */
+
+#define __NO_VERSION__
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/config.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/ndisc.h>
+#include <net/ipv6_route.h>
+#include <net/addrconf.h>
+
+
+#include <net/checksum.h>
+#include <linux/proc_fs.h>
+
+#define NCACHE_NUM_BUCKETS 32
+
+static struct socket ndisc_socket;
+
+unsigned long nd_rand_seed = 152L;
+
+struct ndisc_statistics nd_stats;
+
+static struct neighbour *neighbours[NCACHE_NUM_BUCKETS];
+static struct timer_list ndisc_timer;
+static struct timer_list ndisc_gc_timer;
+
+static atomic_t        ndisc_lock = 0;
+
+/*
+ *     Protocol variables
+ */
+
+int    nd_max_multicast_solicit        = 3;
+int    nd_max_unicast_solicit          = 3;
+int    nd_retrans_timer                = RETRANS_TIMER;
+int    nd_reachable_time               = RECHABLE_TIME;
+int    nd_base_reachable_time          = RECHABLE_TIME;
+int    nd_delay_first_probe            = 5 * HZ;
+int    nd_gc_interval                  = 5 * HZ;
+
+/* 
+ *     garbage collection timeout must be greater than reachable time
+ *     since tstamp is updated by reachable confirmations only.
+ *     gc_staletime actually means the time after last confirmation
+ *     *NOT* after the last time the entry was used.
+ */
+
+int    nd_gc_staletime                 = 3 * RECHABLE_TIME;
+
+static struct neighbour ndisc_insert_queue = {
+       {{{0,}}}, 0, 0, NULL, 0,
+       {0,}, NULL, {0,}, 0, 0, 0, 0, 0,
+       &ndisc_insert_queue,
+       &ndisc_insert_queue
+};
+
+static int ndisc_ins_queue_len = 0;
+
+int  ndisc_event_timer(struct neighbour *neigh);
+
+static void ndisc_bh_insert(void);
+
+int ipv6_random(void)
+{
+       nd_rand_seed=nd_rand_seed*69069L+1;
+        return nd_rand_seed^jiffies;
+}
+
+static __inline__ unsigned long rand_reach_time(void)
+{
+       unsigned long val;
+
+       val = ipv6_random() % (MAX_RANDOM_FACTOR * nd_base_reachable_time);
+       if (val < (MIN_RANDOM_FACTOR * nd_base_reachable_time))
+       {
+               val += (MIN_RANDOM_FACTOR * nd_base_reachable_time);
+       }
+
+       return val;
+}
+
+void ndisc_verify_reachability(struct neighbour * neigh);
+
+/*
+ *     (inline) support functions
+ */
+
+static __inline__ __u32 ndisc_hash(struct in6_addr *addr)
+{
+        
+        __u32 hash_val;
+        
+        hash_val = addr->s6_addr32[2] ^ addr->s6_addr32[3];
+
+        hash_val ^= hash_val >> 16;
+        
+        return (hash_val & (NCACHE_NUM_BUCKETS - 1));
+}
+
+
+static __inline__ void ndisc_neigh_queue(struct neighbour *neigh)
+{
+       struct neighbour *next = &ndisc_insert_queue;
+
+       ndisc_ins_queue_len++;
+
+       neigh->prev = next->prev;
+       neigh->prev->next = neigh;
+       next->prev = neigh;
+       neigh->next = next;
+}
+
+static __inline__ struct neighbour * ndisc_dequeue(void)
+{
+       struct neighbour *next = &ndisc_insert_queue;
+       struct neighbour *head;
+
+       ndisc_ins_queue_len--;
+
+       head = next->next;
+
+       if (head == next)
+       {
+               return NULL;
+       }
+
+       head->next->prev = head->prev;
+       next->next = head->next;
+
+       head->next = NULL;
+       head->prev = NULL;
+
+       return head;
+}
+
+static __inline__ void ndisc_release_lock(void)
+{
+       unsigned long flags;
+
+       save_flags(flags);
+       cli();
+
+       ndisc_lock--;
+
+       if (ndisc_lock == 0 && ndisc_ins_queue_len)
+       {
+               ndisc_bh_insert();
+       }
+
+       restore_flags(flags);
+}
+
+static void ndisc_insert_neigh(struct neighbour *neigh)
+{
+
+        struct neighbour * bucket;
+        __u32 hash_val = ndisc_hash(&neigh->addr);
+        
+        bucket = neighbours[hash_val];
+        
+        if (!bucket)
+       {
+                neighbours[hash_val] = neigh;
+                return;
+        }
+
+        for (; bucket->next; bucket = bucket->next)
+               ;
+               
+        bucket->next = neigh;
+       neigh->prev = bucket;
+}
+
+static __inline__ struct neighbour * 
+ndisc_retrieve_neigh(struct device *dev, struct in6_addr *addr)
+{
+
+        struct neighbour * iter;
+        iter = neighbours[ndisc_hash(addr)];
+
+        for (; iter; iter = iter->next)
+       {
+               if (dev == iter->dev && ipv6_addr_cmp(addr, &iter->addr) == 0)
+                       return iter;
+       }
+        return NULL;
+}
+
+static void ndisc_unlink_neigh(struct neighbour * neigh) 
+{
+       if (neigh->prev)
+               neigh->prev->next = neigh->next;
+       else
+       {
+               int hash = ndisc_hash(&neigh->addr);
+               neighbours[hash] = neigh->next;
+       }
+
+       if (neigh->next)
+               neigh->next->prev = neigh->prev;
+}
+
+static void ndisc_release_neigh(struct neighbour * neigh)
+{
+       struct sk_buff *skb;
+
+       while((skb=skb_dequeue(&neigh->arp_queue)))
+       {
+               dev_kfree_skb(skb, FREE_WRITE);
+       }
+
+       if (neigh->refcnt == 0)
+       {
+               ndisc_unlink_neigh(neigh);
+               kfree(neigh);
+       }
+}
+
+static void ndisc_bh_insert(void)
+{
+       struct neighbour *neigh;
+
+       while((neigh = ndisc_dequeue()))
+       {
+               ndisc_insert_neigh(neigh);
+       }
+}
+
+
+static void ndisc_garbage_collect(unsigned long arg)
+{
+        struct neighbour * neigh;
+       static unsigned long last_rand = 0;
+        unsigned long now = jiffies;
+       unsigned long flags;
+        int i = 0;
+
+
+       /*
+        *      periodicly compute ReachableTime from random function
+        */
+       if (now - last_rand > REACH_RANDOM_INTERVAL)
+       {
+               last_rand = now;
+               nd_reachable_time = rand_reach_time();
+       }
+
+       save_flags(flags);
+       cli();
+
+       if (ndisc_lock)
+       {
+               restore_flags(flags);
+               ndisc_gc_timer.expires = now + HZ;
+               add_timer(&ndisc_gc_timer);
+               return;
+       }
+               
+        for (; i < NCACHE_NUM_BUCKETS; i++)
+                for (neigh = neighbours[i]; neigh;)
+               {
+                        /*
+                        *      Release unused entries
+                        */
+                        if (neigh->refcnt == 0 &&
+                           ((neigh->nud_state == NUD_FAILED) ||
+                            ((neigh->nud_state == NUD_REACHABLE) &&
+                             (neigh->tstamp <= (now - nd_gc_staletime))
+                             )
+                            )
+                           )
+                       {
+                               struct neighbour *prev;
+                               
+                               prev = neigh;
+                               neigh = neigh->next;
+                                ndisc_release_neigh(prev);
+                               continue;
+                        }
+                       neigh = neigh->next;
+               }
+
+       restore_flags(flags);
+
+        ndisc_gc_timer.expires = now + nd_gc_interval;
+        add_timer(&ndisc_gc_timer);
+}
+
+static __inline__ void ndisc_add_timer(struct neighbour *neigh, int timer)
+{
+       unsigned long now = jiffies;
+       unsigned long tval;
+
+       neigh->expires = now + timer;
+       tval = del_timer(&ndisc_timer);
+
+       if (tval)
+       {
+               tval = min(tval, neigh->expires);
+       }
+       else
+               tval = neigh->expires;
+
+       ndisc_timer.expires = tval;
+       add_timer(&ndisc_timer);
+}
+        
+static void ndisc_del_timer(struct neighbour *neigh)
+{
+       unsigned long tval;
+
+       if (!(neigh->nud_state & NUD_IN_TIMER))
+               return;
+
+       tval = del_timer(&ndisc_timer);
+       
+       if (tval == neigh->expires)
+       {
+               int i;
+               
+               tval = ~0UL;
+
+               /* need to search the entire neighbour cache */
+               for (i=0; i < NCACHE_NUM_BUCKETS; i++)
+               {
+                       for (neigh = neighbours[i]; neigh; neigh=neigh->next)
+                               if (neigh->nud_state & NUD_IN_TIMER)
+                               {
+                                       tval = min(tval, neigh->expires);
+                               }
+               }
+
+       }
+
+       if (tval == ~(0UL))
+               return;
+
+       ndisc_timer.expires = tval;
+       add_timer(&ndisc_timer);
+}
+
+static struct neighbour * ndisc_new_neigh(struct device *dev,
+                                         struct in6_addr *addr)
+{
+       struct neighbour *neigh;
+       unsigned long flags;
+
+       neigh = (struct neighbour *) kmalloc(sizeof(struct neighbour),
+                                            GFP_ATOMIC);
+
+       if (neigh == NULL)
+       {
+               printk(KERN_DEBUG "ndisc: kmalloc failure\n");
+               return NULL;
+       }
+
+       nd_stats.allocs++;
+
+       memset(neigh, 0, sizeof (struct neighbour));
+       skb_queue_head_init(&neigh->arp_queue);
+
+       ipv6_addr_copy(&neigh->addr, addr);
+       neigh->len = 128;
+       neigh->type = ipv6_addr_type(addr);
+       neigh->dev = dev;
+       neigh->tstamp = jiffies;
+
+       if (dev->type == ARPHRD_LOOPBACK || dev->type == ARPHRD_SIT)
+       {
+               neigh->flags |= NCF_NOARP;
+       }
+
+       save_flags(flags);
+       cli();
+
+       if (ndisc_lock == 0)
+       {
+               /* Add to the cache. */
+               ndisc_insert_neigh(neigh);
+       }
+       else
+       {
+               ndisc_neigh_queue(neigh);
+       }
+
+       restore_flags(flags);
+
+       return neigh;
+}
+
+/*
+ *     Called when creating a new dest_cache entry for a given destination
+ *     is likely that an entry for the refered gateway exists in cache
+ *
+ */
+
+struct neighbour * ndisc_get_neigh(struct device *dev, struct in6_addr *addr)
+{
+       struct neighbour *neigh;
+
+       /*
+        *      neighbour cache:
+        *      cached information about nexthop and addr resolution
+        */
+
+       if (dev == NULL)
+       {
+               printk(KERN_DEBUG "ncache_get_neigh: NULL device\n");
+               return NULL;
+       }
+
+       atomic_inc(&ndisc_lock);
+
+        neigh = ndisc_retrieve_neigh(dev, addr);
+
+       ndisc_release_lock();
+
+       if (neigh == NULL)
+       {
+               neigh = ndisc_new_neigh(dev, addr);
+       }
+
+       atomic_inc(&neigh->refcnt);
+       
+       return neigh;   
+}
+
+/*
+ *     return values
+ *     0 - Address Resolution succeded, send packet
+ *     1 - Address Resolution unfinished / packet queued
+ */
+
+int ndisc_eth_resolv(unsigned char *h_dest, struct device *dev,
+                    struct sk_buff *skb)
+{
+       struct neighbour *neigh;
+
+       neigh = skb->nexthop;
+
+       if (neigh == NULL)
+       {
+               int addr_type;
+
+               addr_type = ipv6_addr_type(&skb->ipv6_hdr->daddr);
+               
+               if (addr_type & IPV6_ADDR_MULTICAST)
+               {
+                       ipv6_mc_map(&skb->ipv6_hdr->daddr, h_dest);
+                       return 0;
+               }
+
+               printk(KERN_DEBUG "ndisc_eth_resolv: nexthop is NULL\n");
+               goto discard;
+       }
+
+       if (skb->pkt_type == PACKET_NDISC)
+               goto ndisc_pkt;
+       
+       switch (neigh->nud_state) {     
+       case NUD_FAILED:
+       case NUD_NONE:
+               ndisc_event_send(neigh, skb);
+
+       case NUD_INCOMPLETE:                    
+               if (skb_queue_len(&neigh->arp_queue) >= NDISC_QUEUE_LEN)
+               {
+                       struct sk_buff *buff;
+                       
+                       buff = neigh->arp_queue.prev;
+                       skb_unlink(buff);
+                       dev_kfree_skb(buff, FREE_WRITE);
+               }
+               skb_queue_head(&neigh->arp_queue, skb);
+               return 1;
+       default:
+               ndisc_event_send(neigh, skb);
+       }
+
+  ndisc_pkt:
+
+       if (neigh->h_dest == NULL)
+       {
+               printk(KERN_DEBUG "neigh->h_dest is NULL\n");
+               goto discard;
+       }
+
+       memcpy(h_dest, neigh->h_dest, dev->addr_len);
+
+       if ((neigh->flags & NCF_HHVALID) == 0)
+       {
+               /*
+                * copy header to hh_data and move h_dest pointer
+                * this is strictly media dependent.
+                */
+       }
+       return 0;
+
+  discard:
+       
+       dev_kfree_skb(skb, FREE_WRITE);
+       return 1;
+}
+
+
+/* Send the actual Neighbour Advertisement */
+
+void ndisc_send_na(struct device *dev, struct neighbour *neigh,
+                  struct in6_addr *daddr,
+                  struct in6_addr *solicited_addr,
+                  int router, int solicited, int override, int inc_opt) 
+{
+        struct sock *sk = (struct sock *)ndisc_socket.data;
+        struct nd_msg *msg;
+        int len, opt_len;
+        struct sk_buff *skb;
+       int err;
+
+       opt_len = ((dev->addr_len + 1) >> 3) + 1;
+       len = sizeof(struct icmpv6hdr) + sizeof(struct in6_addr);
+
+       if (inc_opt)
+       {
+               len += opt_len << 3;
+       }
+
+       skb = sock_alloc_send_skb(sk, MAX_HEADER + len, 0, 0, &err);
+
+       if (skb == NULL)
+       {
+               printk(KERN_DEBUG "send_na: alloc skb failed\n");
+       }
+
+       skb->free=1;
+
+       if (ipv6_bld_hdr_2(sk, skb, dev, neigh, solicited_addr, daddr,
+                          IPPROTO_ICMPV6, len) < 0)
+        {
+               kfree_skb(skb, FREE_WRITE);
+               printk(KERN_DEBUG 
+                      "ndisc_send_na: ipv6_build_header returned < 0\n");
+               return;
+       }
+
+       skb->pkt_type = PACKET_NDISC;
+       
+       msg = (struct nd_msg *) skb_put(skb, len);
+
+        msg->icmph.type = NDISC_NEIGHBOUR_ADVERTISEMENT;
+        msg->icmph.code = 0;
+        msg->icmph.checksum = 0;
+
+        msg->icmph.icmp6_unused = 0;
+        msg->icmph.icmp6_router    = router;
+        msg->icmph.icmp6_solicited = solicited;
+        msg->icmph.icmp6_override  = override;
+
+        /* Set the target address. */
+       ipv6_addr_copy(&msg->target, solicited_addr);
+
+       if (inc_opt)
+       {
+               /* Set the source link-layer address option. */
+               msg->opt.opt_type = ND_OPT_TARGET_LL_ADDR;
+               msg->opt.opt_len = opt_len;
+               memcpy(msg->opt.link_addr, dev->dev_addr, dev->addr_len);
+
+               if ((opt_len << 3) - (2 + dev->addr_len))
+               {
+                       memset(msg->opt.link_addr + dev->addr_len, 0,
+                              (opt_len << 3) - (2 + dev->addr_len));
+               }
+       }
+
+       /* checksum */
+       msg->icmph.checksum = csum_ipv6_magic(solicited_addr, daddr, len, 
+                                             IPPROTO_ICMPV6,
+                                             csum_partial((__u8 *) msg, 
+                                                          len, 0));
+
+       ipv6_queue_xmit(sk, skb->dev, skb, 1);
+}        
+
+void ndisc_send_ns(struct device *dev, struct neighbour *neigh,
+                  struct in6_addr *solicit,
+                  struct in6_addr *daddr, struct in6_addr *saddr) 
+{
+        struct sock *sk = (struct sock *) ndisc_socket.data;
+        struct sk_buff *skb;
+        struct nd_msg *msg;
+        int len, opt_len;
+       int err;
+
+       /* length of addr in 8 octet groups.*/
+       opt_len = ((dev->addr_len + 1) >> 3) + 1;
+       len = sizeof(struct icmpv6hdr) + sizeof(struct in6_addr) +
+                (opt_len << 3);
+
+        skb = sock_alloc_send_skb(sk, MAX_HEADER + len, 0, 0, &err);
+       if (skb == NULL)
+       {
+               printk(KERN_DEBUG "send_ns: alloc skb failed\n");
+               return;
+       }
+
+       skb->free=1;
+       skb->pkt_type = PACKET_NDISC;
+
+       if (saddr == NULL)
+       {
+               struct inet6_ifaddr *ifa;
+
+               /* use link local address */
+               ifa = ipv6_get_lladdr(dev);
+
+               if (ifa)
+               {
+                       saddr = &ifa->addr;
+               }
+       }
+
+        if(ipv6_addr_type(daddr) == IPV6_ADDR_MULTICAST)
+       {
+                nd_stats.snt_probes_mcast++;
+       }
+        else
+       {
+                nd_stats.snt_probes_ucast++;
+       }
+
+        if (ipv6_bld_hdr_2(sk, skb, dev, neigh, saddr, daddr, IPPROTO_ICMPV6,
+                          len) < 0 )
+       {
+                kfree_skb(skb, FREE_WRITE);
+                printk(KERN_DEBUG
+                       "ndisc_send_ns: ipv6_build_header returned < 0\n");
+                return;
+        }
+       
+        msg = (struct nd_msg *)skb_put(skb, len);
+        msg->icmph.type = NDISC_NEIGHBOUR_SOLICITATION;
+        msg->icmph.code = 0;
+        msg->icmph.checksum = 0;
+        msg->icmph.icmp6_unused = 0;
+
+        /* Set the target address. */
+        ipv6_addr_copy(&msg->target, solicit);
+
+        /* Set the source link-layer address option. */
+        msg->opt.opt_type = ND_OPT_SOURCE_LL_ADDR;
+        msg->opt.opt_len = opt_len;
+
+        memcpy(msg->opt.link_addr, dev->dev_addr, dev->addr_len);
+
+       if ((opt_len << 3) - (2 + dev->addr_len))
+       {
+               memset(msg->opt.link_addr + dev->addr_len, 0,
+                      (opt_len << 3) - (2 + dev->addr_len));
+       }
+
+       /* checksum */
+       msg->icmph.checksum = csum_ipv6_magic(&skb->ipv6_hdr->saddr,
+                                             daddr, len, 
+                                             IPPROTO_ICMPV6,
+                                             csum_partial((__u8 *) msg, 
+                                                          len, 0));
+       /* send it! */
+       ipv6_queue_xmit(sk, skb->dev, skb, 1);
+}
+
+void ndisc_send_rs(struct device *dev, struct in6_addr *saddr,
+                  struct in6_addr *daddr)
+{
+       struct sock *sk = (struct sock *) ndisc_socket.data;
+        struct sk_buff *skb;
+        struct icmpv6hdr *hdr;
+       __u8 * opt;
+        int len, opt_len;
+       int err;
+
+       /* length of addr in 8 octet groups.*/
+       opt_len = ((dev->addr_len + 1) >> 3) + 1;
+       len = sizeof(struct icmpv6hdr) + (opt_len << 3);
+
+        skb = sock_alloc_send_skb(sk, MAX_HEADER + len, 0, 0, &err);
+       if (skb == NULL)
+       {
+               printk(KERN_DEBUG "send_ns: alloc skb failed\n");
+       }
+
+       skb->free=1;
+
+        if (ipv6_bld_hdr_2(sk, skb, dev, NULL, saddr, daddr, IPPROTO_ICMPV6,
+                          len) < 0 )
+       {
+                kfree_skb(skb, FREE_WRITE);
+                printk(KERN_DEBUG
+                       "ndisc_send_ns: ipv6_build_header returned < 0\n");
+                return;
+        }
+       
+        hdr = (struct icmpv6hdr *) skb_put(skb, len);
+        hdr->type = NDISC_ROUTER_SOLICITATION;
+        hdr->code = 0;
+        hdr->checksum = 0;
+        hdr->icmp6_unused = 0;
+
+       opt = (u8*) (hdr + 1);
+
+        /* Set the source link-layer address option. */
+        opt[0] = ND_OPT_SOURCE_LL_ADDR;
+        opt[1] = opt_len;
+
+        memcpy(opt + 2, dev->dev_addr, dev->addr_len);
+
+       if ((opt_len << 3) - (2 + dev->addr_len))
+       {
+               memset(opt + 2 + dev->addr_len, 0,
+                      (opt_len << 3) - (2 + dev->addr_len));
+       }
+
+       /* checksum */
+       hdr->checksum = csum_ipv6_magic(&skb->ipv6_hdr->saddr, daddr, len,
+                                       IPPROTO_ICMPV6,
+                                       csum_partial((__u8 *) hdr, len, 0));
+
+       /* send it! */
+       ipv6_queue_xmit(sk, skb->dev, skb, 1);
+}
+                  
+
+static int ndisc_store_hwaddr(struct device *dev, __u8 *opt, int opt_len,
+                             __u8 *h_addr, int option)
+{
+       while (*opt != option && opt_len)
+       {
+               int len;
+
+               len = opt[1] << 3;
+               
+               if (len == 0)
+               {
+                       printk(KERN_WARNING "nd: option has 0 len\n");
+                       return -EINVAL;
+               }
+
+               opt += len;
+               opt_len -= len;
+       }
+
+       if (*opt == option)
+       {
+               memcpy(h_addr, opt + 2, dev->addr_len); 
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+/* Called when a timer expires for a neighbour entry. */
+
+static void ndisc_timer_handler(unsigned long arg) 
+{
+       unsigned long now = jiffies;
+        struct neighbour * neigh;
+       unsigned long ntimer = ~0UL;
+        int i;
+
+       atomic_inc(&ndisc_lock);
+
+       for (i=0; i < NCACHE_NUM_BUCKETS; i++)
+       {
+                for (neigh = neighbours[i]; neigh;)
+               {
+                        if (neigh->nud_state & NUD_IN_TIMER)
+                       {
+                               int time;
+
+                               if (neigh->expires <= now)
+                               {
+                                       time = ndisc_event_timer(neigh);
+                               }
+                               else
+                                       time = neigh->expires - now;
+
+                               if (time == 0)
+                               {
+                                       unsigned long flags;
+
+                                       save_flags(flags);
+                                       cli();
+
+                                       if (ndisc_lock == 1)
+                                       {
+                                               struct neighbour *old = neigh;
+
+                                               neigh = neigh->next;
+                                               ndisc_release_neigh(old);
+                                               restore_flags(flags);
+                                               continue;
+                                       }
+
+                                       restore_flags(flags);
+                               }
+
+                               ntimer = min(ntimer, time);
+                       }
+                       neigh = neigh->next;
+               }
+       }
+
+       if (ntimer != (~0UL))
+       {
+               ndisc_timer.expires = jiffies + ntimer;
+               add_timer(&ndisc_timer);
+       }
+       ndisc_release_lock();
+}
+
+
+int ndisc_event_timer(struct neighbour *neigh)
+{
+       struct in6_addr *daddr;
+       struct in6_addr *target;
+       struct in6_addr mcaddr;
+       struct device *dev;
+       int max_probes;
+
+       if (neigh->nud_state == NUD_DELAY)
+       {
+               neigh->nud_state = NUD_PROBE;
+       }
+
+       max_probes = (neigh->nud_state == NUD_PROBE ? nd_max_unicast_solicit:
+                     nd_max_multicast_solicit);
+
+       if (neigh->probes == max_probes)
+       {
+               struct sk_buff *skb;
+
+               neigh->nud_state = NUD_FAILED;
+               neigh->flags |= NCF_INVALID;
+               nd_stats.res_failed++;
+
+               while((skb=skb_dequeue(&neigh->arp_queue)))
+               {
+                       /*
+                        *      "The sender MUST return an ICMP
+                        *       destination unreachable"
+                        */
+                       icmpv6_send(skb, ICMPV6_DEST_UNREACH,
+                                   ICMPV6_ADDR_UNREACH, 0, neigh->dev);
+
+                       dev_kfree_skb(skb, FREE_WRITE);
+               }
+               return 0;
+       }
+               
+       neigh->probes++;
+
+       dev = neigh->dev;
+       target = &neigh->addr;
+
+       if (neigh->nud_state == NUD_INCOMPLETE)
+       {
+               addrconf_addr_solict_mult(&neigh->addr, &mcaddr);
+               daddr = &mcaddr;                
+               neigh = NULL;
+       }
+       else
+       {
+               daddr = &neigh->addr;           
+       }
+
+       ndisc_send_ns(dev, neigh, target, daddr, NULL);
+
+       return nd_retrans_timer;        
+}
+
+void ndisc_event_send(struct neighbour *neigh, struct sk_buff *skb)
+{
+       unsigned long now = jiffies;
+       struct in6_addr daddr;
+       struct in6_addr *saddr = NULL;
+
+       switch (neigh->nud_state) {
+       case NUD_FAILED:
+               neigh->probes = 0;
+       case NUD_NONE:
+
+               if (skb && !skb->stamp.tv_sec)
+               {
+                       /*
+                        *      skb->stamp allows us to know if we are
+                        *      originating the skb or forwarding it.
+                        *      (it is set on netif_rx)
+                        */
+                       saddr = &skb->ipv6_hdr->saddr;
+               }
+
+               neigh->nud_state = NUD_INCOMPLETE;
+               addrconf_addr_solict_mult(&neigh->addr, &daddr);
+               ndisc_send_ns(neigh->dev, NULL, &neigh->addr, &daddr, saddr);
+               ndisc_add_timer(neigh, nd_retrans_timer);
+
+               break;
+
+       case NUD_REACHABLE:
+               if (now - neigh->tstamp < nd_reachable_time)
+                       break;
+
+       case NUD_STALE:
+               neigh->nud_state = NUD_DELAY;
+               ndisc_add_timer(neigh, nd_delay_first_probe);
+       }
+}
+
+/*
+ *     Received a neighbour announce
+ */
+void ndisc_event_na(struct neighbour *neigh, unsigned char * opt, int opt_len,
+                   int solicited, int override)
+{
+       struct sk_buff *skb;
+
+       if (neigh->nud_state == NUD_NONE)
+       {
+               neigh->nud_state = NUD_INCOMPLETE;
+       }
+
+       if (neigh->nud_state == NUD_INCOMPLETE || override)
+       {
+
+               if (opt_len == 0)
+               {
+                       printk(KERN_DEBUG "no opt on NA\n");
+               }
+               else
+               {
+                       /* record hardware address */
+
+                       neigh->h_dest = neigh->hh_data;
+                       neigh->flags &= ~NCF_HHVALID;
+
+                       if (ndisc_store_hwaddr(neigh->dev, opt, opt_len,
+                                              neigh->h_dest, 
+                                              ND_OPT_TARGET_LL_ADDR))
+                       {
+                               printk(KERN_DEBUG
+                                      "event_na: invalid TARGET_LL_ADDR\n");
+                               neigh->h_dest = NULL;
+                               neigh->nud_state = NUD_NONE;
+                               return;
+                       }
+               }
+       }
+
+
+       if (solicited || override || neigh->nud_state == NUD_INCOMPLETE)
+       {
+
+               neigh->probes = 0;
+               neigh->tstamp = jiffies;
+
+               if (neigh->nud_state & NUD_IN_TIMER)
+               {
+                       ndisc_del_timer(neigh);
+               }
+
+               if (solicited)
+               {
+                       neigh->nud_state = NUD_REACHABLE;
+               }
+               else
+               {
+                       neigh->nud_state = NUD_STALE;
+               }
+       }
+                       
+       while ((skb=skb_dequeue(&neigh->arp_queue)))
+       {
+               int priority = SOPRI_NORMAL;
+
+               if (skb->sk)
+                       priority = skb->sk->priority;
+               
+               dev_queue_xmit(skb, neigh->dev, priority);
+       }
+}
+
+static void ndisc_event_ns(struct in6_addr *saddr, struct sk_buff *skb)
+{
+       struct neighbour *neigh;
+       u8 *opt;
+       int len;
+
+       opt = skb->h.raw;
+       opt += sizeof(struct icmpv6hdr) + sizeof(struct in6_addr);
+
+       len = skb->tail - opt;
+
+       neigh = ndisc_retrieve_neigh(skb->dev, saddr);
+
+       if (neigh == NULL)
+       {
+               neigh = ndisc_new_neigh(skb->dev, saddr);
+       }
+               
+       switch(neigh->nud_state) {
+               case NUD_REACHABLE:
+               case NUD_STALE:
+               case NUD_DELAY:
+                       if (*opt != ND_OPT_SOURCE_LL_ADDR ||
+                           len != neigh->dev->addr_len ||
+                           memcmp(neigh->h_dest, opt + 2, len))
+                       {
+                               break;
+                       }
+
+                       if (neigh->nud_state & NUD_IN_TIMER)
+                       {
+                               ndisc_del_timer(neigh);
+                       }
+               default:
+                       neigh->flags &= ~NCF_HHVALID;
+                       neigh->h_dest = neigh->hh_data;
+                       
+                       if (ndisc_store_hwaddr(neigh->dev, opt, len,
+                                              neigh->h_dest,
+                                              ND_OPT_SOURCE_LL_ADDR))
+                       {
+                               printk(KERN_DEBUG 
+                                      "event_ns: invalid SOURCE_LL_ADDR\n");
+                               neigh->h_dest = NULL;
+                               neigh->nud_state = NUD_NONE;
+                               return;
+                       }
+
+                       neigh->nud_state = NUD_STALE;
+                       neigh->tstamp = jiffies;
+                       neigh->probes = 0;
+       }
+
+}
+
+static struct rt6_info *ndisc_get_dflt_router(struct device *dev,
+                                             struct in6_addr *addr)
+{      
+       struct rt6_info *iter;
+
+       for (iter = default_rt_list; iter; iter=iter->next)
+       {
+               if (dev == iter->rt_dev &&
+                   ipv6_addr_cmp(&iter->rt_dst, addr) == 0)
+               {
+                       return iter;
+               }
+       }
+       return NULL;
+}
+
+static void ndisc_add_dflt_router(struct rt6_info *rt)
+{
+       struct rt6_info *iter;
+
+       rt->rt_ref++;
+       rt->fib_node = &routing_table;
+       rt6_stats.fib_rt_alloc++;
+
+       if (default_rt_list == NULL)
+       {
+               default_rt_list = rt;
+               return;
+       }
+
+       for (iter = default_rt_list; iter->next; iter=iter->next)
+               ;
+
+       iter->next = rt;
+}
+
+static void ndisc_del_dflt_router(struct rt6_info *rt)
+{
+       struct rt6_info *iter, *back;
+
+       if (rt == default_rt_list)
+       {
+               default_rt_list = rt->next;
+       }
+       else
+       {
+               back = NULL;
+               for (iter = default_rt_list; iter; iter=iter->next)
+               {
+                       if (iter == rt)
+                       {
+                               back->next = rt->next;
+                               break;
+                       }
+                       back = iter;
+               }
+       }
+
+       rt->fib_node = NULL;
+       rt_release(rt);
+}
+
+static void ndisc_purge_dflt_routers(void)
+{
+       struct rt6_info *iter, *rt;
+
+       for (iter = default_rt_list; iter; )
+       {
+               rt = iter;
+               iter=iter->next;
+               rt_release(rt);
+       }
+       default_rt_list = NULL;
+}
+
+static void ndisc_ll_addr_update(struct neighbour *neigh, u8* opt, int len,
+                                int type)
+{
+       switch(neigh->nud_state) {
+       case NUD_REACHABLE:
+       case NUD_STALE:
+       case NUD_DELAY:
+               if (len == neigh->dev->addr_len &&
+                   memcmp(neigh->h_dest, opt + 2, len) == 0)
+               {
+                       break;
+               }
+
+               if (neigh->nud_state & NUD_IN_TIMER)
+               {
+                       ndisc_del_timer(neigh);
+               }
+       default:
+               neigh->flags &= ~NCF_HHVALID;
+               neigh->h_dest = neigh->hh_data;
+               
+               if (ndisc_store_hwaddr(neigh->dev, opt, len, neigh->h_dest,
+                                      type))
+               {
+                       printk(KERN_DEBUG "NDISC: invalid LL_ADDR\n");
+                       neigh->h_dest = NULL;
+                       neigh->nud_state = NUD_NONE;
+                       break;
+               }
+               
+               neigh->nud_state = NUD_STALE;
+               neigh->tstamp = jiffies;
+               neigh->probes = 0;
+       }
+       
+}
+
+struct rt6_info * dflt_rt_lookup(void)
+{
+       struct rt6_info *match = NULL;
+       struct rt6_info *rt;
+       int score = -1;
+       unsigned long now = jiffies;
+
+       for (rt = default_rt_list; rt; rt=rt->next)
+       {
+               struct neighbour *neigh = rt->rt_nexthop;
+               
+               if (score < 0)
+               {
+                       score = 0;
+                       match = rt;
+               }
+
+               if (neigh->nud_state == NUD_REACHABLE)
+               {
+                       if (score < 1)
+                       {
+                               score = 1;
+                               match = rt;
+                       }
+
+                       if (now  - neigh->tstamp < nd_reachable_time)
+                       {
+                               return rt;
+                       }
+               }
+
+       }
+
+       return match;
+}
+
+static void ndisc_router_discovery(struct sk_buff *skb)
+{
+        struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw;
+       struct neighbour *neigh;
+       struct inet6_dev *in6_dev;
+       struct rt6_info *rt;
+       int lifetime;
+       int optlen;
+
+       __u8 * opt = (__u8 *)(ra_msg + 1);
+
+       optlen = (skb->tail - skb->h.raw) - sizeof(struct ra_msg);
+
+       if (skb->ipv6_hdr->hop_limit != 255)
+       {
+               printk(KERN_WARNING
+                      "NDISC: fake router advertisment received\n");
+               return;
+       }
+
+       /*
+        *      set the RA_RECV flag in the interface
+        */
+
+       in6_dev = ipv6_get_idev(skb->dev);
+       if (in6_dev == NULL)
+       {
+               printk(KERN_DEBUG "RA: can't find in6 device\n");
+               return;
+       }
+       
+       if (in6_dev->if_flags & IF_RS_SENT)
+       {
+               /*
+                *      flag that an RA was received after an RS was sent
+                *      out on this interface.
+                */
+               in6_dev->if_flags |= IF_RA_RCVD;
+       }
+
+       lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
+
+       rt = ndisc_get_dflt_router(skb->dev, &skb->ipv6_hdr->saddr);
+
+       if (rt && lifetime == 0)
+       {
+               ndisc_del_dflt_router(rt);
+               rt = NULL;
+       }
+
+       if (rt == NULL && lifetime)
+       {
+               printk(KERN_DEBUG "ndisc_rdisc: new default router\n");
+
+               rt = (struct rt6_info *)kmalloc(sizeof(struct rt6_info),
+                                               GFP_ATOMIC);
+
+               neigh = ndisc_retrieve_neigh(skb->dev, &skb->ipv6_hdr->saddr);
+
+               if (neigh == NULL)
+               {
+                       neigh = ndisc_new_neigh(skb->dev,
+                                               &skb->ipv6_hdr->saddr);
+               }
+
+               atomic_inc(&neigh->refcnt);
+               neigh->flags |= NCF_ROUTER;
+
+               memset(rt, 0, sizeof(struct rt6_info));
+
+               ipv6_addr_copy(&rt->rt_dst, &skb->ipv6_hdr->saddr);
+               rt->rt_metric = 1;
+               rt->rt_flags = RTF_GATEWAY | RTF_DYNAMIC;
+               rt->rt_dev = skb->dev;
+               rt->rt_nexthop = neigh;
+
+               ndisc_add_dflt_router(rt);
+       }
+
+       if (rt)
+       {
+               rt->rt_expires = jiffies + (HZ * lifetime);
+        }
+
+       if (ra_msg->icmph.icmp6_hop_limit)
+       {
+               ipv6_hop_limit = ra_msg->icmph.icmp6_hop_limit;
+       }
+
+       /*
+        *      Update Reachable Time and Retrans Timer
+        */
+
+       if (ra_msg->retrans_timer)
+       {
+               nd_retrans_timer = ntohl(ra_msg->retrans_timer);
+       }
+
+       if (ra_msg->reachable_time)
+       {
+               __u32 rtime = ntohl(ra_msg->reachable_time);
+
+               if (rtime != nd_base_reachable_time)
+               {
+                       nd_base_reachable_time = rtime;
+                       nd_gc_staletime = 3 * nd_base_reachable_time;
+                       nd_reachable_time = rand_reach_time();
+               }
+               
+       }
+
+       /*
+        *      Process options.
+        */
+
+        while(optlen > 0) {
+                int len;
+
+                len = (opt[1] << 3);
+
+               if (len == 0)
+               {
+                       printk(KERN_DEBUG "RA: opt has 0 len\n");
+                       break;
+               }
+
+                switch(*opt) {
+                case ND_OPT_SOURCE_LL_ADDR:
+                       
+                       if (rt == NULL)
+                               break;
+                       
+                       neigh = rt->rt_nexthop;
+
+                       ndisc_ll_addr_update(neigh, opt, len,
+                                            ND_OPT_SOURCE_LL_ADDR);
+                       break;
+
+                case ND_OPT_PREFIX_INFO:
+                       addrconf_prefix_rcv(skb->dev, opt, len);
+                        break;
+
+                case ND_OPT_MTU:
+
+                       if (rt)
+                       {
+                               int mtu;
+                               struct device *dev;
+                               
+                               mtu = htonl(*(__u32 *)opt+4);
+                               dev = rt->rt_nexthop->dev;
+
+                               if (mtu < 576)
+                               {
+                                       printk(KERN_DEBUG "NDISC: router "
+                                              "announcement with mtu = %d\n",
+                                              mtu);
+                                       break;
+                               }
+
+                               if (dev->change_mtu)
+                               {
+                                       dev->change_mtu(dev, mtu);
+                               }
+                               else
+                               {
+                                       dev->mtu = mtu;
+                               }
+                       }
+                        break;
+
+               case ND_OPT_TARGET_LL_ADDR:
+               case ND_OPT_REDIRECT_HDR:
+                       printk(KERN_DEBUG "got illegal option with RA");
+                       break;
+               default:
+                       printk(KERN_DEBUG "unkown option in RA\n");
+                }
+                optlen -= len;
+                opt += len;
+        }
+        
+}
+
+void ndisc_forwarding_on(void)
+{
+       /*
+        *      forwarding was turned on
+        */
+
+       ndisc_purge_dflt_routers();
+}
+
+void ndisc_forwarding_off(void)
+{
+       /*
+        *      forwarding was turned off
+        */
+}
+
+static void ndisc_redirect_rcv(struct sk_buff *skb)
+{
+       struct icmpv6hdr *icmph;
+       struct in6_addr *dest;
+       struct in6_addr *target;        /* new first hop to destination */
+       struct neighbour *neigh;
+       struct rt6_info *rt;
+       int on_link = 0;
+       int optlen;
+       u8 * opt;
+
+       if (skb->ipv6_hdr->hop_limit != 255)
+       {
+               printk(KERN_WARNING
+                      "NDISC: fake ICMP redirect received\n");
+               return;
+       }
+
+       if (!(ipv6_addr_type(&skb->ipv6_hdr->saddr) & IPV6_ADDR_LINKLOCAL))
+       {
+               printk(KERN_WARNING
+                      "ICMP redirect: source address is not linklocal\n");
+               return;
+       }
+
+       optlen = skb->tail - skb->h.raw;
+       optlen -= sizeof(struct icmpv6hdr) + 2 * sizeof(struct in6_addr);
+
+       if (optlen < 0)
+       {
+               printk(KERN_WARNING "ICMP redirect: packet too small\n");
+               return;
+       }
+
+       icmph = (struct icmpv6hdr *) skb->h.raw;
+       target = (struct in6_addr *) (icmph + 1);
+       dest = target + 1;
+
+       if (ipv6_addr_type(dest) & IPV6_ADDR_MULTICAST)
+       {
+               printk(KERN_WARNING "ICMP redirect for multicast addr\n");
+               return;
+       }
+
+       if (ipv6_addr_cmp(dest, target) == 0)
+       {
+               on_link = 1;
+       }
+       else if (!(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL))
+       {
+               printk(KERN_WARNING
+                      "ICMP redirect: target address is not linklocal\n");
+               return;
+       }
+
+       /* passed validation tests */
+
+       rt = ipv6_rt_redirect(skb->dev, dest, target, on_link);
+
+       if (rt == NULL)
+       {
+               printk(KERN_WARNING "ICMP redirect: no route to host\n");
+               return;
+       }
+
+       neigh = rt->rt_nexthop;
+
+       opt = (u8 *) (dest + 1);
+
+       while (optlen > 0)
+       {
+               int len;
+
+               len = (opt[1] << 3);
+
+               if (*opt == ND_OPT_TARGET_LL_ADDR)
+               {
+                       ndisc_ll_addr_update(neigh, opt, len,
+                                            ND_OPT_TARGET_LL_ADDR);
+               }
+
+               opt += len;
+               optlen -= len;
+       }
+}
+
+void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
+                        struct in6_addr *target)
+{
+       struct sock *sk = (struct sock *) ndisc_socket.data;
+       int len = sizeof(struct icmpv6hdr) + 2 * sizeof(struct in6_addr);
+       struct sk_buff *buff;
+       struct inet6_ifaddr *ifp;
+       struct icmpv6hdr *icmph;
+       struct in6_addr *addrp;
+       struct rt6_info *rt;
+       int ta_len = 0;
+       u8 *opt;
+       int rd_len;
+       int err;
+       int hlen;
+
+       rt = fibv6_lookup(&skb->ipv6_hdr->saddr, skb->dev, 0);
+       
+       if (rt->rt_flags & RTF_GATEWAY)
+       {
+               printk(KERN_DEBUG "ndisc_send_redirect: not a neighbour\n");
+               return;
+       }
+
+       if (neigh->nud_state == NUD_REACHABLE)
+       {
+               ta_len  = ((neigh->dev->addr_len + 1) >> 3) + 1;
+               len += (ta_len << 3);
+       }
+
+       rd_len = min(536 - len, ntohs(skb->ipv6_hdr->payload_len) + 8);
+       rd_len &= ~0x7;
+       len += rd_len;
+
+       ifp = ipv6_get_lladdr(skb->dev);
+
+       if (ifp == NULL)
+       {
+               printk(KERN_DEBUG "redirect: no link_local addr for dev\n");
+               return;
+       }
+
+       buff = sock_alloc_send_skb(sk, MAX_HEADER + len, 0, 0, &err);
+
+       if (buff == NULL)
+       {
+               printk(KERN_DEBUG "ndisc_send_redirect: alloc_skb failed\n");
+               return;
+       }
+
+       
+       hlen = 0;
+       if (skb->dev->hard_header_len)
+       {
+               hlen = (skb->dev->hard_header_len + 15) & ~15;
+       }
+
+       skb_reserve(buff, hlen + sizeof(struct ipv6hdr));
+       
+       icmph = (struct icmpv6hdr *) skb_put(buff, len);
+
+       memset(icmph, 0, sizeof(struct icmpv6hdr));
+       icmph->type = NDISC_REDIRECT;
+
+       /*
+        *      copy target and destination addresses
+        */
+
+       addrp = (struct in6_addr *)(icmph + 1);
+       ipv6_addr_copy(addrp, target);
+       addrp++;
+       ipv6_addr_copy(addrp, &skb->ipv6_hdr->daddr);
+       
+       opt = (u8*) (addrp + 1);
+               
+       /*
+        *      include target_address option
+        */
+
+       if (ta_len)
+       {
+               int zb;
+               
+               *(opt++) = ND_OPT_TARGET_LL_ADDR;
+               *(opt++) = ta_len;
+
+               memcpy(opt, neigh->h_dest, neigh->dev->addr_len);
+               opt += neigh->dev->addr_len;
+
+               /* 
+                *      if link layer address doesn't end on a 8 byte
+                *      boundary memset(0) the remider
+                */
+
+               zb = (neigh->dev->addr_len + 2) & 0x7; 
+               if (zb)
+               {
+                       int comp;
+
+                       comp = 8 - zb;
+                       memset(opt, 0, comp);
+                       opt += comp;
+               }
+       }
+
+       /*
+        *      build redirect option and copy skb over to the new packet.
+        */
+
+       memset(opt, 0, 8);      
+       *(opt++) = ND_OPT_REDIRECT_HDR;
+       *(opt++) = (rd_len >> 3);
+       opt += 6;
+
+       memcpy(opt, &skb->ipv6_hdr, rd_len - 8);
+       
+       icmph->checksum = csum_ipv6_magic(&ifp->addr, &skb->ipv6_hdr->saddr,
+                                         len, IPPROTO_ICMPV6,
+                                         csum_partial((u8 *) icmph, len, 0));
+
+       ipv6_xmit(sk, buff, &ifp->addr, &skb->ipv6_hdr->saddr, NULL, IPPROTO_ICMPV6);
+}
+
+/* Called by upper layers to validate neighbour cache entries. */
+
+void ndisc_validate(struct neighbour *neigh)
+{
+        if (neigh->nud_state == NUD_INCOMPLETE)
+                return;
+
+        if (neigh->nud_state == NUD_DELAY) 
+       {
+                ndisc_del_timer(neigh);
+        }
+
+        nd_stats.rcv_upper_conf++;
+        neigh->nud_state = NUD_REACHABLE;
+        neigh->tstamp = jiffies;
+}
+
+int ndisc_rcv(struct sk_buff *skb, struct device *dev,
+             struct in6_addr *saddr, struct in6_addr *daddr,
+             struct ipv6_options *opt, unsigned short len)
+{
+       struct nd_msg *msg = (struct nd_msg *) skb->h.raw;
+       struct neighbour *neigh;
+       struct inet6_ifaddr *ifp;
+
+       switch (msg->icmph.type) {
+       case NDISC_NEIGHBOUR_SOLICITATION:
+               if ((ifp = ipv6_chk_addr(&msg->target)))
+               {
+                       int addr_type;
+
+                       if (ifp->flags & DAD_INCOMPLETE)
+                       {
+                               /*
+                                *      DAD failed 
+                                */
+
+                               printk(KERN_DEBUG "duplicate address\n");
+                               del_timer(&ifp->timer);
+                               return 0;
+                       }
+
+                       addr_type = ipv6_addr_type(saddr);
+                       if (addr_type & IPV6_ADDR_UNICAST)
+                       {
+                               int inc;
+
+                               /* 
+                                *      update / create cache entry
+                                *      for the source adddress
+                                */
+
+                               nd_stats.rcv_probes_ucast++;
+                               ndisc_event_ns(saddr, skb);
+
+                               /* answer solicitation */
+                               neigh = ndisc_retrieve_neigh(dev, saddr);
+
+                               inc = ipv6_addr_type(daddr);
+                               inc &= IPV6_ADDR_MULTICAST;
+
+                               ndisc_send_na(dev, neigh, saddr, &ifp->addr, 
+                                             ifp->idev->router, 1, inc, inc);
+                       }
+                       else
+                       {
+                               /* FIXME */
+                               printk(KERN_DEBUG "ns: non unicast saddr\n");
+                       }
+               }
+               break;
+
+       case NDISC_NEIGHBOUR_ADVERTISEMENT:
+
+               neigh = ndisc_retrieve_neigh(skb->dev, &msg->target);
+               if (neigh)
+               {
+                       if (neigh->flags & NCF_ROUTER)
+                       {
+                               if (msg->icmph.icmp6_router == 0)
+                               {
+                                       /*
+                                        *      Change: router to host
+                                        */
+                                       
+                                       struct rt6_info *rt;
+                                       rt = ndisc_get_dflt_router(skb->dev,
+                                                                  saddr);
+                                       if (rt)
+                                       {
+                                               ndisc_del_dflt_router(rt);
+                                       }
+                               }
+                       }
+                       else
+                       {
+                               if (msg->icmph.icmp6_router)
+                               {
+                                       neigh->flags |= NCF_ROUTER;
+                               }
+                       }
+                       ndisc_event_na(neigh, (unsigned char *) &msg->opt,
+                                      skb->tail - (u8 *)&msg->opt /*opt_len*/,
+                                      msg->icmph.icmp6_solicited,
+                                      msg->icmph.icmp6_override);
+               }
+               break;
+
+       }
+
+       if (ipv6_forwarding == 0)
+       {
+               switch (msg->icmph.type) {
+               case NDISC_ROUTER_ADVERTISEMENT:
+                       ndisc_router_discovery(skb);
+                       break;
+
+               case NDISC_REDIRECT:
+                       ndisc_redirect_rcv(skb);
+                       break;
+               }
+       }
+
+       return 0;
+}
+
+int ndisc_get_info(char *buffer, char **start, off_t offset, int length,
+                  int dummy)
+{
+       struct neighbour *neigh;
+       unsigned long now = jiffies;
+       int len = 0;
+       int i;
+
+       atomic_inc(&ndisc_lock);
+
+       for (i = 0; i < NCACHE_NUM_BUCKETS; i++)
+       {
+               for(neigh = neighbours[i]; neigh; neigh=neigh->next)
+               {
+                       int j;
+
+                       for (j=0; j<16; j++)
+                       {
+                               sprintf(buffer + len, "%02x",
+                                       neigh->addr.s6_addr[j]);
+                               len += 2;
+                       }
+
+                       len += sprintf(buffer + len,
+                                      " %02x %02x %08lx %08lx %04x %04x ",
+                                      i,
+                                      neigh->nud_state,
+                                      neigh->expires - now,
+                                      now - neigh->tstamp,
+                                      neigh->refcnt,
+                                      neigh->flags);
+
+                       if (neigh->h_dest)
+                       {
+                               for (j=0; j< neigh->dev->addr_len; j++)
+                               {
+                                       sprintf(buffer + len, "%02x",
+                                               neigh->h_dest[j]);
+                                       len += 2;
+                               }
+                       }
+                       else
+                                len += sprintf(buffer + len, "000000000000");
+                       len += sprintf(buffer + len, "\n");
+                                      
+               }
+       }
+
+       ndisc_release_lock();
+
+       *start = buffer + offset;
+
+       len -= offset;
+
+       if (len > length)
+               len = length;
+       return len;
+}
+
+struct proc_dir_entry ndisc_proc_entry =
+{
+        0, 11, "ndisc_cache",
+        S_IFREG | S_IRUGO, 1, 0, 0,
+        0, NULL,
+        &ndisc_get_info
+};
+
+void ndisc_init(struct proto_ops *ops)
+{
+       struct sock *sk;
+        int i = 0;
+        int err;
+
+       /*
+        *      Init ndisc_socket
+        */
+       ndisc_socket.type=SOCK_RAW;
+       ndisc_socket.ops=ops;
+
+       if((err=ops->create(&ndisc_socket, IPPROTO_ICMPV6))<0)
+               printk(KERN_DEBUG 
+                      "Failed to create the NDISC control socket.\n");
+
+       MOD_DEC_USE_COUNT;
+
+       sk = ndisc_socket.data;
+       sk->allocation = GFP_ATOMIC;
+       sk->net_pinfo.af_inet6.hop_limit = 255;
+       sk->net_pinfo.af_inet6.priority  = 15;
+       sk->num = 256;                          /* Don't receive any data */
+
+        /*
+         * Initialize the neighbours hash buckets.
+         */
+
+        for (; i < NCACHE_NUM_BUCKETS; i++)
+                neighbours[i] = NULL;
+        /* General ND state machine timer. */
+       init_timer(&ndisc_timer);
+       ndisc_timer.function = ndisc_timer_handler;
+       ndisc_timer.data = 0L;
+       ndisc_timer.expires = 0L;
+
+        /* ND GC timer */
+        init_timer(&ndisc_gc_timer);
+        ndisc_gc_timer.function = ndisc_garbage_collect;
+        ndisc_gc_timer.data = 0L;
+        ndisc_gc_timer.expires = jiffies + nd_gc_interval;
+
+       add_timer(&ndisc_gc_timer);
+
+#ifdef CONFIG_IPV6_MODULE
+       ndisc_eth_hook = ndisc_eth_resolv;
+        proc_register_dynamic(&proc_net, &ndisc_proc_entry);
+#endif
+}
+
+#ifdef CONFIG_IPV6_MODULE
+void ndisc_cleanup(void)
+{
+       ndisc_eth_hook = NULL;
+        proc_unregister(&proc_net, ndisc_proc_entry.low_ino);
+       del_timer(&ndisc_gc_timer);
+       del_timer(&ndisc_timer);
+}
+#endif
+
+/*
+ * Local variables:
+ *  compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h  -c -o ndisc.o ndisc.c"
+ * c-file-style: "Linux"
+ * End:
+ */
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
new file mode 100644 (file)
index 0000000..7ba6f5b
--- /dev/null
@@ -0,0 +1,112 @@
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+
+struct inet6_protocol *inet6_protocol_base = NULL;
+struct inet6_protocol *inet6_protos[MAX_INET_PROTOS] = 
+{
+       NULL
+};
+
+
+struct inet6_protocol *inet6_get_protocol(unsigned char prot)
+{
+       unsigned char hash;
+       struct inet6_protocol *p;
+
+       hash = prot & (MAX_INET_PROTOS - 1);
+       for (p = inet6_protos[hash] ; p != NULL; p=p->next) 
+       {
+               if (p->protocol == prot) 
+                       return((struct inet6_protocol *) p);
+       }
+       return(NULL);
+}
+
+void inet6_add_protocol(struct inet6_protocol *prot)
+{
+       unsigned char hash;
+       struct inet6_protocol *p2;
+
+       hash = prot->protocol & (MAX_INET_PROTOS - 1);
+       prot ->next = inet6_protos[hash];
+       inet6_protos[hash] = prot;
+       prot->copy = 0;
+
+       /*
+        *      Set the copy bit if we need to. 
+        */
+        
+       p2 = (struct inet6_protocol *) prot->next;
+       while(p2 != NULL) 
+       {
+               if (p2->protocol == prot->protocol) 
+               {
+                       prot->copy = 1;
+                       break;
+               }
+               p2 = (struct inet6_protocol *) p2->next;
+       }
+}
+
+/*
+ *     Remove a protocol from the hash tables.
+ */
+int inet6_del_protocol(struct inet6_protocol *prot)
+{
+       struct inet6_protocol *p;
+       struct inet6_protocol *lp = NULL;
+       unsigned char hash;
+
+       hash = prot->protocol & (MAX_INET_PROTOS - 1);
+       if (prot == inet6_protos[hash]) 
+       {
+               inet6_protos[hash] = (struct inet6_protocol *) inet6_protos[hash]->next;
+               return(0);
+       }
+
+       p = (struct inet6_protocol *) inet6_protos[hash];
+       while(p != NULL) 
+       {
+               /*
+                * We have to worry if the protocol being deleted is
+                * the last one on the list, then we may need to reset
+                * someone's copied bit.
+                */
+               if (p->next != NULL && p->next == prot) 
+               {
+                       /*
+                        * if we are the last one with this protocol and
+                        * there is a previous one, reset its copy bit.
+                        */
+                       if (p->copy == 0 && lp != NULL) 
+                               lp->copy = 0;
+                       p->next = prot->next;
+                       return(0);
+               }
+               if (p->next != NULL && p->next->protocol == prot->protocol) 
+                       lp = p;
+
+               p = (struct inet6_protocol *) p->next;
+       }
+       return(-1);
+}
+
+/*
+ * Local variables:
+ *  compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h  -c -o protocol.o protocol.c"
+ * End:
+ */
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
new file mode 100644 (file)
index 0000000..cab6219
--- /dev/null
@@ -0,0 +1,458 @@
+/*
+ *     RAW sockets for IPv6
+ *     Linux INET6 implementation 
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *     Adapted from linux/net/ipv4/raw.c
+ *
+ *     $Id: raw.c,v 1.5 1996/10/29 22:45:53 roque Exp $
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmpv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ip.h>
+#include <net/udp.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/ipv6_route.h>
+#include <net/addrconf.h>
+#include <net/transp_v6.h>
+
+#include <asm/uaccess.h>
+
+void rawv6_err(struct sock *sk, int type, int code, unsigned char *buff,
+              struct in6_addr *saddr, struct in6_addr *daddr)
+{
+       if (sk == NULL) 
+               return;
+
+}
+
+static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
+{
+       /* Charge it to the socket. */
+       
+       if (sock_queue_rcv_skb(sk,skb)<0)
+       {
+               /* ip_statistics.IpInDiscards++; */
+               skb->sk=NULL;
+               kfree_skb(skb, FREE_READ);
+               return 0;
+       }
+
+       /* ip_statistics.IpInDelivers++; */
+       return 0;
+}
+
+/*
+ *     This is next to useless... 
+ *     if we demultiplex in network layer we don't need the extra call
+ *     just to queue the skb... 
+ *     maybe we could have the network decide uppon an hint if it 
+ *     should call raw_rcv for demultiplexing
+ */
+int rawv6_rcv(struct sk_buff *skb, struct device *dev,
+             struct in6_addr *saddr, struct in6_addr *daddr,
+             struct ipv6_options *opt, unsigned short len)
+{
+       struct sock *sk;
+
+       sk = skb->sk;
+
+       if (sk->ip_hdrincl)
+       {
+               skb->h.raw = (unsigned char *) skb->ipv6_hdr;
+       }
+
+       if (sk->users) {
+               __skb_queue_tail(&sk->back_log, skb);
+               return 0;
+       }
+
+       rawv6_rcv_skb(sk, skb);
+       return 0;
+}
+
+
+/*
+ *     This should be easy, if there is something there
+ *     we return it, otherwise we block.
+ */
+
+int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
+                 int noblock, int flags,int *addr_len)
+{
+       struct sockaddr_in6 *sin6=(struct sockaddr_in6 *)msg->msg_name;
+       struct sk_buff *skb;
+       int copied=0;
+       int err;
+
+
+       if (flags & MSG_OOB)
+               return -EOPNOTSUPP;
+               
+       if (sk->shutdown & RCV_SHUTDOWN) 
+               return(0);
+
+       if (addr_len) 
+               *addr_len=sizeof(*sin6);
+
+       skb=skb_recv_datagram(sk, flags, noblock, &err);
+       if(skb==NULL)
+               return err;
+       
+       copied = min(len, skb->tail - skb->h.raw);
+       
+       skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+       sk->stamp=skb->stamp;
+
+       /* Copy the address. */
+       if (sin6) 
+       {
+               sin6->sin6_family = AF_INET6;
+               memcpy(&sin6->sin6_addr, &skb->ipv6_hdr->saddr, 
+                      sizeof(struct in6_addr));
+
+               *addr_len = sizeof(struct sockaddr_in6);
+       }
+
+       if (msg->msg_control)
+       {
+               int err;
+
+               err = datagram_recv_ctl(sk, msg, skb);
+
+               if (err < 0)
+               {
+                       copied = err;
+               }
+       }
+
+       skb_free_datagram(sk, skb);
+       return (copied);
+}
+
+/*
+ *     Sending...
+ */
+
+struct rawv6_fakehdr {
+       struct iovec    *iov;
+       struct sock     *sk;
+       __u32           len;
+       __u32           cksum;
+       __u32           proto;
+       struct in6_addr *daddr;
+};
+
+static void rawv6_getfrag(const void *data, struct in6_addr *saddr, 
+                         char *buff, unsigned int offset, unsigned int len)
+{
+       struct iovec *iov = (struct iovec *) data;
+
+       memcpy_fromiovecend(buff, iov, offset, len);
+}
+
+static void rawv6_frag_cksum(const void *data, struct in6_addr *addr,
+                            char *buff, unsigned int offset, 
+                            unsigned int len)
+{
+       struct rawv6_fakehdr *hdr = (struct rawv6_fakehdr *) data;
+       
+       hdr->cksum = csum_partial_copy_fromiovecend(buff, hdr->iov, offset, 
+                                                   len, hdr->cksum);
+       
+       if (offset == 0)
+       {
+               struct sock *sk;
+               struct raw6_opt *opt;
+               struct in6_addr *daddr;
+               
+               sk = hdr->sk;
+               opt = &sk->tp_pinfo.tp_raw;
+
+               if (hdr->daddr)
+               {
+                       daddr = hdr->daddr;
+               }
+               else
+               {
+                       daddr = addr + 1;
+               }
+               
+               hdr->cksum = csum_ipv6_magic(addr, daddr, hdr->len,
+                                            hdr->proto, hdr->cksum);
+               
+               if (opt->offset < len)
+               {
+                       __u16 *csum;
+
+                       csum = (__u16 *) (buff + opt->offset);
+                       *csum = hdr->cksum;
+               }
+               else
+               {
+                       /* 
+                        *  FIXME 
+                        *  signal an error to user via sk->err
+                        */
+                       printk(KERN_DEBUG "icmp: cksum offset too big\n");
+               }
+       }       
+}
+
+
+static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len, 
+                        int noblock, int flags)
+{
+       struct ipv6_options opt_space;
+       struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
+       struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+       struct ipv6_options *opt = NULL;
+       struct device *dev = NULL;
+       struct in6_addr *saddr = NULL;
+       int addr_len = msg->msg_namelen;
+       struct in6_addr *daddr;
+       struct raw6_opt *raw_opt;
+       u16 proto;
+       int err;
+       
+
+       /* Mirror BSD error message compatibility */
+       if (flags & MSG_OOB)            
+               return -EOPNOTSUPP;
+                        
+       if (flags & ~MSG_DONTROUTE)
+               return(-EINVAL);
+       /*
+        *      Get and verify the address. 
+        */
+
+       if (sin6) 
+       {
+               if (addr_len < sizeof(struct sockaddr_in6)) 
+                       return(-EINVAL);
+
+               if (sin6->sin6_family && sin6->sin6_family != AF_INET6) 
+                       return(-EINVAL);
+               
+               /* port is the proto value [0..255] carried in nexthdr */
+               proto = ntohs(sin6->sin6_port);
+
+               if (!proto)
+                       proto = sk->num;
+
+               if (proto > 255)
+                       return(-EINVAL);
+
+               daddr = &sin6->sin6_addr;
+               
+               if (np->dest && ipv6_addr_cmp(daddr, &np->daddr))
+               {
+                       ipv6_dst_unlock(np->dest);
+                       np->dest = NULL;
+               }               
+       }
+       else 
+       {
+               if (sk->state != TCP_ESTABLISHED) 
+                       return(-EINVAL);
+               
+               proto = sk->num;
+               daddr = &(sk->net_pinfo.af_inet6.daddr);
+       }
+
+       if (ipv6_addr_any(daddr))
+       {
+               /* 
+                * unspecfied destination address 
+                * treated as error... is this correct ?
+                */
+               return(-EINVAL);
+       }
+
+       /*
+        *      We don't allow > 64K sends yet.          
+        */
+       if (len + (sk->ip_hdrincl ? 0 : sizeof(struct ipv6hdr)) > 65535)
+               return -EMSGSIZE;
+
+       if (msg->msg_control)
+       {
+               opt = &opt_space;
+               memset(opt, 0, sizeof(struct ipv6_options));
+
+               err = datagram_send_ctl(msg, &dev, &saddr, opt);
+               if (err < 0)
+               {
+                       printk(KERN_DEBUG "invalid msg_control\n");
+                       return err;
+               }               
+       }
+
+       raw_opt = &sk->tp_pinfo.tp_raw;
+
+       
+       if (raw_opt->checksum)
+       {
+               struct rawv6_fakehdr hdr;
+               
+               hdr.iov = msg->msg_iov;
+               hdr.sk  = sk;
+               hdr.len = len;
+               hdr.cksum = 0;
+               hdr.proto = proto;
+
+               if (opt && opt->srcrt)
+               {
+                       hdr.daddr = daddr;
+               }
+               else
+               {
+                       hdr.daddr = NULL;
+               }
+
+               err = ipv6_build_xmit(sk, rawv6_frag_cksum, &hdr, daddr, len,
+                                     saddr, dev, opt, proto, noblock);
+       }
+       else
+       {
+               err = ipv6_build_xmit(sk, rawv6_getfrag, msg->msg_iov, daddr,
+                                     len, saddr, dev, opt, proto,
+                                     noblock);
+       }
+
+       return err<0?err:len;
+}
+
+static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, 
+                              char *optval, int optlen)
+{
+       struct raw6_opt *opt = &sk->tp_pinfo.tp_raw;
+       int err = 0;
+
+       switch (optname) {
+               case ICMPV6_FILTER:
+                       copy_from_user(&opt->filter, optval,
+                                      sizeof(struct icmp6_filter));
+                       break;
+               default:
+                       err = -ENOPROTOOPT;
+       };
+
+       return err;
+}
+
+static int rawv6_setsockopt(struct sock *sk, int level, int optname, 
+                           char *optval, int optlen)
+{
+       struct raw6_opt *opt = &sk->tp_pinfo.tp_raw;
+       int val, err;
+
+       switch(level)
+       {
+               case SOL_RAW:
+                       break;
+
+               case SOL_ICMPV6:
+                       if (sk->num != IPPROTO_ICMPV6)
+                               return -EOPNOTSUPP;
+                       return rawv6_seticmpfilter(sk, level, optname, optval,
+                                                  optlen);
+               default:
+                       return ipv6_setsockopt(sk, level, optname, optval,
+                                              optlen);
+       }
+
+       if (optval == NULL)
+               return(-EINVAL);
+
+       err = get_user(val, (int *)optval);
+       if(err)
+               return err;
+
+       switch (optname) 
+       {
+               case RAW_CHECKSUM:
+                       opt->checksum = 1;
+                       opt->offset = val;
+
+                       return 0;
+                       break;
+
+               default:
+                       return(-ENOPROTOOPT);
+       }
+}
+
+static void rawv6_close(struct sock *sk, unsigned long timeout)
+{
+       struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+
+       sk->state = TCP_CLOSE;
+
+       if (np->dest)
+       {
+               ipv6_dst_unlock(np->dest);
+       }
+
+       destroy_sock(sk);
+}
+
+static int rawv6_init_sk(struct sock *sk)
+{
+       return(0);
+}
+
+struct proto rawv6_prot = {
+       rawv6_close,
+       udpv6_connect,
+       NULL,
+       NULL,
+       NULL,
+       NULL,
+       datagram_select,
+       NULL,
+       rawv6_init_sk,
+       NULL,
+       NULL,
+       rawv6_setsockopt,
+       ipv6_getsockopt,                /* FIXME */
+       rawv6_sendmsg,
+       rawv6_recvmsg,
+       NULL,           /* No special bind */
+       rawv6_rcv_skb,
+       128,
+       0,
+       "RAW",
+       0, 0,
+       NULL
+};
+
+/*
+ * Local variables:
+ *  compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h  -c -o rawv6.o rawv6.c"
+ *  c-file-style: "Linux"
+ * End:
+ */
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
new file mode 100644 (file)
index 0000000..e76dcc1
--- /dev/null
@@ -0,0 +1,354 @@
+/*
+ *     IPv6 fragment reassembly
+ *     Linux INET6 implementation 
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *     Based on: net/ipv4/ip_fragment.c
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/ipv6_route.h>
+#include <net/addrconf.h>
+
+
+static struct frag_queue ipv6_frag_queue = {
+       &ipv6_frag_queue, &ipv6_frag_queue,
+       0, {0}, NULL, NULL,
+       0
+};
+
+static void                    create_frag_entry(struct sk_buff *skb, 
+                                                 struct device *dev,
+                                                 __u8 *nhptr,
+                                                 struct frag_hdr *fhdr);
+static int                     reasm_frag_1(struct frag_queue *fq, 
+                                            struct sk_buff **skb_in);
+
+static void                    reasm_queue(struct frag_queue *fq, 
+                                           struct sk_buff *skb, 
+                                           struct frag_hdr *fhdr);
+
+static int reasm_frag(struct frag_queue *fq, struct sk_buff **skb, 
+                     __u8 *nhptr,
+                     struct frag_hdr *fhdr)
+{
+       __u32   expires;
+       int nh;
+
+       expires = del_timer(&fq->timer);
+
+       /*
+        *      We queue the packet even if it's the last.
+        *      It's a trade off. This allows the reassembly 
+        *      code to be simpler (=faster) and of the
+        *      steps we do for queueing the only unnecessary 
+        *      one it's the kmalloc for a struct ipv6_frag.
+        *      Feel free to try other alternatives...
+        */
+       reasm_queue(fq, *skb, fhdr);
+
+       if ((fhdr->frag_off & __constant_htons(0x0001)) == 0)
+       {
+               fq->last_in = 1;
+               fq->nhptr = nhptr;
+       }
+
+       if (fq->last_in)
+       {
+               if ((nh = reasm_frag_1(fq, skb)))
+                       return nh;
+       }
+               
+       fq->timer.expires = expires;
+       add_timer(&fq->timer);
+       
+       return 0;
+}
+
+int ipv6_reassembly(struct sk_buff **skb, struct device *dev, __u8 *nhptr,
+                   struct ipv6_options *opt)
+{
+       struct frag_hdr *fhdr = (struct frag_hdr *) ((*skb)->h.raw);
+       struct frag_queue *fq;
+       
+       for (fq = ipv6_frag_queue.next; fq != &ipv6_frag_queue; fq = fq->next)
+       {
+               if (fq->id == fhdr->identification)
+               {                       
+                       return reasm_frag(fq, skb, nhptr,fhdr);
+               }
+       }
+       
+       create_frag_entry(*skb, dev, nhptr, fhdr);
+
+
+       return 0;
+}
+
+
+static void fq_free(struct frag_queue *fq)
+{
+       struct ipv6_frag *fp, *back;
+
+       for(fp = fq->fragments; fp; )
+       {
+               kfree_skb(fp->skb, FREE_READ);          
+               back = fp;
+               fp=fp->next;
+               kfree(back);
+       }
+
+       fq->prev->next = fq->next;
+       fq->next->prev = fq->prev;
+
+       fq->prev = fq->next = NULL;
+       
+       kfree(fq);
+
+}
+
+static void frag_expire(unsigned long data)
+{
+       struct frag_queue *fq;
+       struct ipv6_frag *frag;
+
+       fq = (struct frag_queue *) data;
+
+       del_timer(&fq->timer);
+
+       frag = fq->fragments;
+
+       if (frag == NULL)
+       {
+               printk(KERN_DEBUG "invalid fragment queue\n");
+               return;
+       }
+
+       icmpv6_send(frag->skb, ICMPV6_TIME_EXCEEDED, ICMPV6_EXC_FRAGTIME, 0,
+                   frag->skb->dev);
+       
+       fq_free(fq);
+}
+
+
+static void create_frag_entry(struct sk_buff *skb, struct device *dev, 
+                             __u8 *nhptr,
+                             struct frag_hdr *fhdr)
+{
+       struct frag_queue *fq;
+
+       fq = (struct frag_queue *) kmalloc(sizeof(struct frag_queue), 
+                                          GFP_ATOMIC);
+
+       if (fq == NULL)
+       {
+               kfree_skb(skb, FREE_READ);
+               return;
+       }
+
+       memset(fq, 0, sizeof(struct frag_queue));
+
+       fq->id = fhdr->identification;
+
+       fq->dev = dev;
+
+       /* init_timer has been done by the memset */
+       fq->timer.function = frag_expire;
+       fq->timer.data = (long) fq;
+       fq->timer.expires = jiffies + IPV6_FRAG_TIMEOUT;
+
+       fq->nexthdr = fhdr->nexthdr;
+
+
+       if ((fhdr->frag_off & __constant_htons(0x0001)) == 0)
+       {
+               fq->last_in = 1;
+               fq->nhptr = nhptr;
+       }
+       reasm_queue(fq, skb, fhdr);
+
+       fq->prev = ipv6_frag_queue.prev;
+       fq->next = &ipv6_frag_queue;
+       fq->prev->next = fq;
+       ipv6_frag_queue.prev = fq;
+       
+       add_timer(&fq->timer);
+}
+
+
+static void reasm_queue(struct frag_queue *fq, struct sk_buff *skb, 
+                       struct frag_hdr *fhdr)
+{
+       struct ipv6_frag *nfp, *fp, **bptr;
+
+       nfp = (struct ipv6_frag *) kmalloc(sizeof(struct ipv6_frag), 
+                                          GFP_ATOMIC);
+
+       if (nfp == NULL)
+       {
+               kfree_skb(skb, FREE_READ);
+               return;
+       }
+
+       
+       nfp->offset = ntohs(fhdr->frag_off) & ~0x7;
+       nfp->len = (ntohs(skb->ipv6_hdr->payload_len) -
+                   ((u8 *) (fhdr + 1) - (u8 *) (skb->ipv6_hdr + 1)));
+
+
+       nfp->skb  = skb;
+       nfp->fhdr = fhdr;
+
+       nfp->next = NULL;
+
+       bptr = &fq->fragments;
+       
+
+       for (fp = fq->fragments; fp; fp=fp->next)
+       {
+               if (nfp->offset <= fp->offset)
+                       break;
+               bptr = &fp->next;
+       }
+       
+       if (fp && fp->offset == nfp->offset)
+       {
+               if (fp->len != nfp->len)
+               {
+                       /* this cannot happen */
+                       printk(KERN_DEBUG "reasm_queue: dup with wrong len\n");
+               }
+
+               /* duplicate. discard it. */
+               kfree_skb(skb, FREE_READ);
+               kfree(nfp);
+               return;
+       }
+       
+
+       *bptr = nfp;
+       nfp->next = fp;
+}
+
+/*
+ *     check if this fragment completes the packet
+ *     returns true on success
+ */
+static int reasm_frag_1(struct frag_queue *fq, struct sk_buff **skb_in)
+{
+       struct ipv6_frag *fp;
+       struct ipv6_frag *tail = NULL;
+       struct sk_buff *skb;
+       __u32  offset = 0;
+       __u32  payload_len;
+       __u16  unfrag_len;
+       __u16  copy;
+       int    nh;
+
+
+       for(fp = fq->fragments; fp; fp=fp->next)
+       {
+               if (offset != fp->offset)
+                       return 0;
+
+               offset += fp->len;
+               tail = fp;
+       }
+
+       /* 
+        * we know the m_flag arrived and we have a queue,
+        * starting from 0, without gaps.
+        * this means we have all fragments.
+        */
+
+       unfrag_len = (u8 *) (tail->fhdr) - (u8 *) (tail->skb->ipv6_hdr + 1);
+
+       payload_len = (unfrag_len + tail->offset + 
+                      (tail->skb->tail - (__u8 *) (tail->fhdr + 1)));
+
+       printk(KERN_DEBUG "reasm: payload len = %d\n", payload_len);
+
+       if ((skb = dev_alloc_skb(sizeof(struct ipv6hdr) + payload_len))==NULL)
+       {
+               printk(KERN_DEBUG "reasm_frag: no memory for reassembly\n");
+               fq_free(fq);
+               return 1;
+       }
+
+       copy = unfrag_len + sizeof(struct ipv6hdr);
+
+       skb->ipv6_hdr = (struct ipv6hdr *) skb->data;
+
+       skb->free = 1;
+       skb->dev = fq->dev;
+
+       
+       nh = fq->nexthdr;
+
+       *(fq->nhptr) = nh;
+       memcpy(skb_put(skb, copy), tail->skb->ipv6_hdr, copy);
+
+       skb->h.raw = skb->tail;
+
+       skb->ipv6_hdr->payload_len = ntohs(payload_len);
+
+       *skb_in = skb;
+
+       /*
+        *      FIXME: If we don't have a checksum we ought to be able
+        *      to defragment and checksum in this pass. [AC]
+        */
+       for(fp = fq->fragments; fp; )
+       {
+               struct ipv6_frag *back;
+
+               memcpy(skb_put(skb, fp->len), (__u8*)(fp->fhdr + 1), fp->len);
+               kfree_skb(fp->skb, FREE_READ);
+               back = fp;
+               fp=fp->next;
+               kfree(back);
+       }
+       
+       fq->prev->next = fq->next;
+       fq->next->prev = fq->prev;
+
+       fq->prev = fq->next = NULL;
+       
+       kfree(fq);
+
+       return nh;
+}
+
+
+
+/*
+ * Local variables:
+ *  compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h  -c -o reassembly.o reassembly.c"
+ * c-file-style: "Linux"
+ * End:
+ */
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
new file mode 100644 (file)
index 0000000..9dbecfb
--- /dev/null
@@ -0,0 +1,578 @@
+/*
+ *     IPv6 over IPv4 tunnel device - Simple Internet Transition (SIT)
+ *     Linux INET6 implementation
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmp.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/ndisc.h>
+#include <net/ipv6_route.h>
+#include <net/addrconf.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/sit.h>
+
+
+static int sit_init_dev(struct device *dev);
+
+static struct device sit_device = {
+       "sit0",
+       0, 0, 0, 0,
+       0x0, 0,
+       0, 0, 0, NULL, sit_init_dev
+};
+
+static unsigned long           sit_gc_last_run;
+static void                    sit_mtu_cache_gc(void);
+
+static int                     sit_xmit(struct sk_buff *skb, 
+                                        struct device *dev);
+static int                     sit_rcv(struct sk_buff *skb, 
+                                       struct device *dev, 
+                                       struct options *opt,
+                                       __u32 daddr, unsigned short len,
+                                       __u32 saddr, int redo, 
+                                       struct inet_protocol * protocol);
+
+static int                     sit_open(struct device *dev);
+static int                     sit_close(struct device *dev);
+
+static struct enet_statistics *        sit_get_stats(struct device *dev);
+
+static void                    sit_err(int type, int code, 
+                                       unsigned char *buff, __u32 info,
+                                       __u32 daddr, __u32 saddr,
+                                       struct inet_protocol *protocol);
+
+static struct inet_protocol sit_protocol = {
+       sit_rcv,
+       sit_err,
+       0,
+       IPPROTO_IPV6,
+       0,
+       NULL,
+       "IPv6"
+};
+
+#define SIT_NUM_BUCKETS        16
+
+struct sit_mtu_info *sit_mtu_cache[SIT_NUM_BUCKETS];
+
+static int             vif_num = 0;
+static struct sit_vif  *vif_list = NULL;
+
+static __inline__ __u32 sit_addr_hash(__u32 addr)
+{
+        
+        __u32 hash_val;
+        
+        hash_val = addr;
+
+        hash_val ^= hash_val >> 16;
+       hash_val ^= hash_val >> 8;
+        
+        return (hash_val & (SIT_NUM_BUCKETS - 1));
+}
+
+static void sit_cache_insert(__u32 addr, int mtu)
+{
+       struct sit_mtu_info *minfo;
+       int hash;
+       
+       minfo = kmalloc(sizeof(struct sit_mtu_info), GFP_ATOMIC);
+       
+       if (minfo == NULL)
+               return;
+
+       minfo->addr = addr;
+       minfo->tstamp = jiffies;
+       minfo->mtu = mtu;
+
+       hash = sit_addr_hash(addr);
+
+       minfo->next = sit_mtu_cache[hash];
+       sit_mtu_cache[hash] = minfo;
+}
+
+static struct sit_mtu_info * sit_mtu_lookup(__u32 addr)
+{
+       struct sit_mtu_info *iter;
+       int hash;
+
+       hash = sit_addr_hash(addr);
+
+       for(iter = sit_mtu_cache[hash]; iter; iter=iter->next)
+       {
+               if (iter->addr == addr)
+               {
+                       iter->tstamp = jiffies;
+                       break;
+               }
+       }
+
+       /*
+        *      run garbage collector
+        */
+
+       if (jiffies - sit_gc_last_run > SIT_GC_FREQUENCY)
+       {
+               sit_mtu_cache_gc();
+               sit_gc_last_run = jiffies;
+       }
+
+       return iter;
+}
+
+static void sit_mtu_cache_gc(void)
+{
+       struct sit_mtu_info *iter, *back;
+       unsigned long now = jiffies;
+       int i;
+
+       for (i=0; i < SIT_NUM_BUCKETS; i++)
+       {
+               back = NULL;
+               for (iter = sit_mtu_cache[i]; iter;)
+               {
+                       if (now - iter->tstamp > SIT_GC_TIMEOUT)
+                       {
+                               struct sit_mtu_info *old;
+
+                               old = iter;
+                               iter = iter->next;
+
+                               if (back)
+                               {
+                                       back->next = iter;
+                               }
+                               else
+                               {
+                                       sit_mtu_cache[i] = iter;
+                               }
+
+                               kfree(old);
+                               continue;
+                       }
+                       back = iter;
+                       iter = iter->next;
+               }
+       }
+}
+
+static int sit_init_dev(struct device *dev)
+{
+       int i;
+
+       dev->open       = sit_open;
+       dev->stop       = sit_close;
+
+       dev->hard_start_xmit    = sit_xmit;
+       dev->get_stats          = sit_get_stats;
+
+       dev->priv = kmalloc(sizeof(struct enet_statistics), GFP_KERNEL);
+
+       if (dev->priv == NULL)
+               return -ENOMEM;
+
+       memset(dev->priv, 0, sizeof(struct enet_statistics));
+
+
+       for (i = 0; i < DEV_NUMBUFFS; i++)
+               skb_queue_head_init(&dev->buffs[i]);
+
+       dev->hard_header        = NULL;
+       dev->rebuild_header     = NULL;
+       dev->set_mac_address    = NULL;
+       dev->header_cache_bind  = NULL;
+       dev->header_cache_update= NULL;
+
+       dev->type               = ARPHRD_SIT;
+
+       dev->hard_header_len    = MAX_HEADER;
+       dev->mtu                = 1500 - sizeof(struct iphdr);
+       dev->addr_len           = 0;
+       dev->tx_queue_len       = 2;
+
+       memset(dev->broadcast, 0, MAX_ADDR_LEN);
+       memset(dev->dev_addr,  0, MAX_ADDR_LEN);
+
+       dev->flags              = IFF_NOARP;    
+
+       dev->family             = AF_INET6;
+       dev->pa_addr            = 0;
+       dev->pa_brdaddr         = 0;
+       dev->pa_dstaddr         = 0;
+       dev->pa_mask            = 0;
+       dev->pa_alen            = 4;
+
+       return 0;
+}
+
+static int sit_init_vif(struct device *dev)
+{
+       int i;
+
+       dev->flags = IFF_NOARP|IFF_POINTOPOINT|IFF_MULTICAST;
+       dev->priv = kmalloc(sizeof(struct enet_statistics), GFP_KERNEL);
+
+       if (dev->priv == NULL)
+               return -ENOMEM;
+
+       memset(dev->priv, 0, sizeof(struct enet_statistics));
+
+       for (i = 0; i < DEV_NUMBUFFS; i++)
+               skb_queue_head_init(&dev->buffs[i]);
+               
+       return 0;
+}
+
+static int sit_open(struct device *dev)
+{
+       return 0;
+}
+
+static int sit_close(struct device *dev)
+{
+       return 0;
+}
+
+
+int sit_init(void)
+{
+       int i;
+
+       /* register device */
+
+       if (register_netdev(&sit_device) != 0)
+       {
+               return -EIO;
+       }
+
+       inet_add_protocol(&sit_protocol);
+
+       for (i=0; i < SIT_NUM_BUCKETS; i++)
+               sit_mtu_cache[i] = NULL;
+
+       sit_gc_last_run = jiffies;
+
+       return 0;
+}
+
+struct device *sit_add_tunnel(__u32 dstaddr)
+{
+       struct sit_vif *vif;
+       struct device *dev;
+       
+       vif = kmalloc(sizeof(struct sit_vif), GFP_KERNEL);
+       if (vif == NULL)
+               return NULL;
+       
+       /*
+        *      Create PtoP configured tunnel
+        */
+       
+       dev = kmalloc(sizeof(struct device), GFP_KERNEL);
+       if (dev == NULL)
+               return NULL;
+
+       memcpy(dev, &sit_device, sizeof(struct device));
+       dev->init = sit_init_vif;
+       dev->pa_dstaddr = dstaddr;
+
+       dev->name = vif->name;
+       sprintf(vif->name, "sit%d", ++vif_num);
+
+       register_netdev(dev);
+
+       vif->dev = dev;
+       vif->next = vif_list;
+       vif_list = vif;
+
+       return dev;
+}
+
+void sit_cleanup(void)
+{
+       struct sit_vif *vif;
+       
+       for (vif = vif_list; vif;)
+       {
+               struct device *dev = vif->dev;
+               struct sit_vif *cur;
+
+               unregister_netdev(dev);
+               kfree(dev->priv);
+               kfree(dev);
+               
+               cur = vif;
+               vif = vif->next;
+       }
+
+       vif_list = NULL;
+
+       unregister_netdev(&sit_device);
+       inet_del_protocol(&sit_protocol);
+       
+}
+
+
+
+/*
+ *     receive IPv4 ICMP messages
+ */
+
+static void sit_err(int type, int code, unsigned char *buff, __u32 info,
+                   __u32 daddr, __u32 saddr, struct inet_protocol *protocol)
+                   
+{
+       if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
+       {
+               struct sit_mtu_info *minfo;
+
+               info -= sizeof(struct iphdr);
+
+               minfo = sit_mtu_lookup(daddr);
+
+               printk(KERN_DEBUG "sit: %08lx pmtu = %ul\n", ntohl(saddr),
+                      info);
+               if (minfo == NULL)
+               {
+                       minfo = kmalloc(sizeof(struct sit_mtu_info),
+                                       GFP_ATOMIC);
+
+                       if (minfo == NULL)
+                               return;
+
+                       start_bh_atomic();
+                       sit_cache_insert(daddr, info);
+                       end_bh_atomic();
+               }
+               else
+               {
+                       minfo->mtu = info;
+               }
+       }
+}
+
+static int sit_rcv(struct sk_buff *skb, struct device *idev, 
+                  struct options *opt,
+                  __u32 daddr, unsigned short len,
+                  __u32 saddr, int redo, struct inet_protocol * protocol)
+{
+       struct enet_statistics *stats;
+       struct device *dev = NULL;
+       struct sit_vif *vif;    
+       
+       skb->h.raw = skb_pull(skb, skb->h.raw - skb->data);
+       skb->protocol = __constant_htons(ETH_P_IPV6);
+
+       for (vif = vif_list; vif; vif = vif->next)
+       {
+               if (saddr == vif->dev->pa_dstaddr)
+               {
+                       dev = vif->dev;
+                       break;
+               }
+       }
+
+       if (dev == NULL)
+       {
+               dev = &sit_device;
+       }
+
+       skb->dev = dev;
+       skb->ip_summed = CHECKSUM_NONE;
+
+       stats = (struct enet_statistics *)dev->priv;
+       stats->rx_packets++;
+
+       ipv6_rcv(skb, dev, NULL);
+       return 0;
+}
+
+static int sit_xmit(struct sk_buff *skb, struct device *dev)
+{
+       struct enet_statistics *stats;
+       struct sit_mtu_info *minfo;
+       struct in6_addr *addr6; 
+       unsigned long flags;
+       struct rtable *rt;
+       struct iphdr *iph;
+       __u32 saddr;
+       __u32 daddr;
+       __u32 raddr;
+       int addr_type;
+       int mtu;
+       int len;
+
+       /* 
+        *      Make sure we are not busy (check lock variable) 
+        */
+
+       stats = (struct enet_statistics *)dev->priv;
+       save_flags(flags);
+       cli();
+       if (dev->tbusy != 0) 
+       {
+               restore_flags(flags);
+               printk(KERN_DEBUG "sit_xmit: busy\n");
+               return(1);
+       }
+       dev->tbusy = 1;
+       restore_flags(flags);
+
+       daddr = dev->pa_dstaddr;
+       if (daddr == 0)
+       {
+               addr6 = &skb->ipv6_hdr->daddr;
+               addr_type = ipv6_addr_type(addr6);
+
+               if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
+               {
+                       printk(KERN_DEBUG "sit_xmit: non v4 address\n");
+                       goto on_error;
+               }
+       }
+
+       len = skb->tail - (skb->data + sizeof(struct ipv6hdr));
+
+       if (skb->sk)
+       {
+               atomic_sub(skb->truesize, &skb->sk->wmem_alloc);
+       }
+
+       skb->sk = NULL;
+               
+       iph = (struct iphdr *) skb_push(skb, sizeof(struct iphdr));
+       
+       skb->protocol = htons(ETH_P_IP);
+
+       /* get route */
+
+       rt = ip_rt_route(daddr, skb->localroute);
+       
+       if (rt == NULL)
+       {
+               printk(KERN_DEBUG "sit: no route to host\n");
+               goto on_error;
+       }
+
+       minfo = sit_mtu_lookup(daddr);
+
+       if (minfo)
+               mtu = minfo->mtu;
+       else
+               mtu = rt->rt_dev->mtu;
+
+       if (mtu > 576 && len > mtu)
+       {
+               icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+               goto on_error;
+       }
+
+       saddr = rt->rt_src;
+       skb->dev = rt->rt_dev;
+       raddr = rt->rt_gateway; 
+
+       if (raddr == 0)
+               raddr = daddr;
+
+       /* now for the device header */
+
+       skb->arp = 1;
+               
+       if (skb->dev->hard_header_len) 
+       {
+               int mac;
+
+               if (skb->data - skb->head < skb->dev->hard_header_len)
+               {
+                       printk(KERN_DEBUG "sit: space at head < dev header\n");
+                       goto on_error;
+               }
+
+               if (skb->dev->hard_header)
+               {
+                       mac = skb->dev->hard_header(skb, skb->dev, ETH_P_IP, 
+                                              NULL, NULL, len);
+
+                       if (mac < 0)
+                               skb->arp = 0;
+
+                       skb->raddr = raddr;             
+               }
+               
+       }
+
+       ip_rt_put(rt);
+
+
+       iph->version  = 4;
+       iph->ihl      = 5;
+       iph->tos      = 0;                              /* tos set to 0... */
+
+       if (mtu > 576)
+       {
+               iph->frag_off = htons(IP_DF);
+       }
+       else
+               iph->frag_off = 0;
+
+       iph->ttl      = 64;
+       iph->saddr    = saddr;
+       iph->daddr    = daddr;
+       iph->protocol = IPPROTO_IPV6;
+       skb->ip_hdr   = iph;
+
+       ip_send_check(iph);
+
+       ip_queue_xmit(NULL, skb->dev, skb, 1);
+
+       stats->tx_packets++;
+       dev->tbusy=0;
+
+       return 0;
+
+  on_error:
+       kfree_skb(skb, FREE_WRITE);
+       dev->tbusy=0;
+       stats->tx_errors++;
+       return 0;       
+}
+
+static struct enet_statistics *sit_get_stats(struct device *dev)
+{
+       return((struct enet_statistics*) dev->priv);
+}
+
+
+/*
+ * Local variables:
+ *  compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h  -c -o sit.o sit.c"
+ * c-file-style: "Linux"
+ * End:
+ */
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
new file mode 100644 (file)
index 0000000..ce7bb46
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * sysctl_net_ipv6.c: sysctl interface to net IPV6 subsystem.
+ */
+
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <net/ndisc.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+
+
+int ipv6_hop_limit = IPV6_DEFAULT_HOPLIMIT;
+
+int ipv6_sysctl_forwarding(ctl_table *ctl, int write, struct file * filp,
+                          void *buffer, size_t *lenp)
+{
+       int val = ipv6_forwarding;
+       int retv;
+
+       retv = proc_dointvec(ctl, write, filp, buffer, lenp);
+
+       if (write)
+       {
+               if (ipv6_forwarding && val == 0) {
+                       printk(KERN_DEBUG "sysctl: IPv6 forwarding enabled\n");
+                       ndisc_forwarding_on();
+                       addrconf_forwarding_on();                      
+               }
+
+               if (ipv6_forwarding == 0 && val) {
+                       ndisc_forwarding_off();
+               }
+       }
+       return retv;
+}
+
+ctl_table ipv6_table[] = {
+        {NET_IPV6_FORWARDING, "ipv6_forwarding",
+         &ipv6_forwarding, sizeof(int), 0644, NULL,
+         &ipv6_sysctl_forwarding},
+
+       {NET_IPV6_HOPLIMIT, "ipv6_hop_limit",
+         &ipv6_hop_limit, sizeof(int), 0644, NULL,
+         &proc_dointvec},
+
+       {0}
+};
+
+#ifdef MODULE
+static struct ctl_table_header *ipv6_sysctl_header;
+static struct ctl_table ipv6_root_table[];
+static struct ctl_table ipv6_net_table[];
+
+
+ctl_table ipv6_root_table[] = {
+       {CTL_NET, "net", NULL, 0, 0555, ipv6_net_table},
+        {0}
+};
+
+ctl_table ipv6_net_table[] = {
+       {NET_IPV6, "ipv6", NULL, 0, 0555, ipv6_table},
+        {0}
+};
+
+void ipv6_sysctl_register(void)
+{
+       ipv6_sysctl_header = register_sysctl_table(ipv6_root_table, 0);
+}
+
+void ipv6_sysctl_unregister(void)
+{
+       unregister_sysctl_table(ipv6_sysctl_header);
+}
+
+#endif
+
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
new file mode 100644 (file)
index 0000000..2ce4eea
--- /dev/null
@@ -0,0 +1,1225 @@
+/*
+ *     TCP over IPv6
+ *     Linux INET6 implementation 
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *     $Id: tcp_ipv6.c,v 1.15 1996/10/29 22:45:53 roque Exp $
+ *
+ *     Based on: 
+ *     linux/net/ipv4/tcp.c
+ *     linux/net/ipv4/tcp_input.c
+ *     linux/net/ipv4/tcp_output.c
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/sched.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/random.h>
+
+#include <net/tcp.h>
+#include <net/ndisc.h>
+#include <net/ipv6.h>
+#include <net/transp_v6.h>
+#include <net/addrconf.h>
+#include <net/ipv6_route.h>
+
+#include <asm/uaccess.h>
+
+static void tcp_v6_send_reset(struct in6_addr *saddr, struct in6_addr *daddr, 
+                             struct tcphdr *th, struct proto *prot, 
+                             struct ipv6_options *opt,
+                             struct device *dev, int pri, int hop_limit);
+
+static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
+                             struct sk_buff *skb);
+
+static int tcp_v6_backlog_rcv(struct sock *sk, struct sk_buff *skb);
+static int tcp_v6_build_header(struct sock *sk, struct sk_buff *skb);
+
+static struct tcp_func ipv6_mapped;
+static struct tcp_func ipv6_specific;
+
+static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
+                                  struct in6_addr *saddr, 
+                                  struct in6_addr *daddr, 
+                                  unsigned long base)
+{
+       return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
+}
+
+static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
+{
+       __u32 si;
+       __u32 di;
+
+       if (skb->protocol == __constant_htons(ETH_P_IPV6))
+       {
+               si = skb->ipv6_hdr->saddr.s6_addr32[3];
+               di = skb->ipv6_hdr->daddr.s6_addr32[3];
+       }
+       else
+       {
+               si = skb->saddr;
+               di = skb->daddr;
+       }
+
+       return secure_tcp_sequence_number(di, si,
+                                         skb->h.th->dest,
+                                         skb->h.th->source);
+}
+
+static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
+                         int addr_len)
+{
+       struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
+       struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+       struct dest_entry *dc;
+       struct inet6_ifaddr *ifa;
+       struct tcphdr *th;
+       __u8 *ptr;
+       struct sk_buff *buff;
+       struct sk_buff *skb1;
+       int addr_type;
+       int tmp;
+
+       if (sk->state != TCP_CLOSE) 
+               return(-EISCONN);
+
+       /*
+        *      Don't allow a double connect.
+        */
+               
+       if(!ipv6_addr_any(&np->daddr))
+               return -EINVAL;
+       
+       if (addr_len < sizeof(struct sockaddr_in6)) 
+               return(-EINVAL);
+
+       if (usin->sin6_family && usin->sin6_family != AF_INET6) 
+               return(-EAFNOSUPPORT);
+
+       /*
+        *      connect() to INADDR_ANY means loopback (BSD'ism).
+        */
+       
+       if(ipv6_addr_any(&usin->sin6_addr))
+               usin->sin6_addr.s6_addr[15] = 0x1; 
+
+       addr_type = ipv6_addr_type(&usin->sin6_addr);
+
+       if(addr_type & IPV6_ADDR_MULTICAST)
+       {
+               return -ENETUNREACH;
+       }
+
+       /*
+        *      connect to self not allowed
+        */
+
+       if (ipv6_addr_cmp(&usin->sin6_addr, &np->saddr) == 0 &&
+           usin->sin6_port == sk->dummy_th.source)
+       {
+               return (-EINVAL);
+       }
+
+       memcpy(&np->daddr, &usin->sin6_addr, sizeof(struct in6_addr));
+
+       /*
+        *      TCP over IPv4
+        */
+
+       if (addr_type == IPV6_ADDR_MAPPED)
+       {
+               struct sockaddr_in sin;
+               int err;
+
+               printk(KERN_DEBUG "connect: ipv4 mapped\n");
+
+               sin.sin_family = AF_INET;
+               sin.sin_port = usin->sin6_port;
+               sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
+
+               sk->tp_pinfo.af_tcp.af_specific = &ipv6_mapped;
+               sk->backlog_rcv = tcp_v4_backlog_rcv;
+
+               err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
+
+               if (err)
+               {
+                       sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific;
+                       sk->backlog_rcv = tcp_v6_backlog_rcv;
+               }
+               
+               return err;
+       }
+
+       dc = ipv6_dst_route(&np->daddr, NULL, (sk->localroute ? RTI_GATEWAY : 0));
+       
+       if (dc == NULL)
+       {
+               return -ENETUNREACH;
+       }
+       
+       np->dest = dc;
+       np->dc_sernum = (dc->rt.fib_node ? dc->rt.fib_node->fn_sernum : 0);
+
+       ifa = ipv6_get_saddr((struct rt6_info *)dc, &np->daddr);
+       
+       if (ifa == NULL)
+       {
+               return -ENETUNREACH;
+       }
+
+       
+       /*
+        *      Init variables
+        */
+
+       lock_sock(sk);
+
+       sk->dummy_th.dest = usin->sin6_port;    
+       sk->write_seq = secure_tcp_sequence_number(np->saddr.s6_addr32[3],
+                                                  np->daddr.s6_addr32[3],
+                                                  sk->dummy_th.source,
+                                                  sk->dummy_th.dest);
+
+       tp->snd_wnd = 0;
+       tp->snd_wl1 = 0;
+       tp->snd_wl2 = sk->write_seq;
+       tp->snd_una = sk->write_seq;
+
+       tp->rcv_nxt = 0;
+
+       sk->err = 0;
+
+       release_sock(sk);
+
+       buff = sock_wmalloc(sk, MAX_SYN_SIZE, 0, GFP_KERNEL);   
+
+       if (buff == NULL) 
+       {
+               return(-ENOMEM);
+       }
+       lock_sock(sk);
+       buff->sk = sk;
+       buff->free = 0;
+       buff->localroute = sk->localroute;
+       
+       tmp = tcp_v6_build_header(sk, buff);
+
+       /* set the source address */
+                
+       memcpy(&np->saddr, &ifa->addr, sizeof(struct in6_addr));
+       memcpy(&np->rcv_saddr, &ifa->addr, sizeof(struct in6_addr));
+
+       /* build the tcp header */
+       th = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr));
+       buff->h.th = th;
+
+       memcpy(th, (void *) &(sk->dummy_th), sizeof(*th));
+       buff->seq = sk->write_seq++;
+       th->seq = htonl(buff->seq);
+       tp->snd_nxt = sk->write_seq;
+       buff->end_seq = sk->write_seq;
+       th->ack = 0;
+       th->window = 2;
+       th->syn = 1;
+       th->doff = 6;
+
+       sk->window_clamp=0;
+
+       if ((dc->dc_flags & DCF_PMTU))
+               sk->mtu = dc->dc_pmtu;
+       else
+               sk->mtu = dc->rt.rt_dev->mtu;
+
+       sk->mss = sk->mtu - sizeof(struct ipv6hdr) - sizeof(struct tcphdr);
+
+       /*
+        *      Put in the TCP options to say MTU.
+        */
+
+       ptr = skb_put(buff,4);
+       ptr[0] = 2;
+       ptr[1] = 4;
+       ptr[2] = (sk->mss) >> 8;
+       ptr[3] = (sk->mss) & 0xff;
+       buff->csum = csum_partial(ptr, 4, 0);
+
+       tcp_v6_send_check(sk, th, sizeof(struct tcphdr) + 4, buff);
+       
+       tcp_set_state(sk, TCP_SYN_SENT);
+       
+       /* FIXME: should use dcache->rtt if availiable */
+       tp->rto = TCP_TIMEOUT_INIT;
+
+       tcp_init_xmit_timers(sk);
+
+       sk->retransmits = 0;
+
+       skb_queue_tail(&sk->write_queue, buff);
+       sk->packets_out++;
+       buff->when = jiffies;
+       skb1 = skb_clone(buff, GFP_KERNEL);
+       sk->wmem_alloc += skb1->truesize;
+
+       tmp = ipv6_xmit(sk, skb1, &np->saddr, &np->daddr, NULL, IPPROTO_TCP);
+
+       /* Timer for repeating the SYN until an answer  */
+
+       tcp_reset_xmit_timer(sk, TIME_RETRANS, tp->rto);
+       tcp_statistics.TcpActiveOpens++;
+       tcp_statistics.TcpOutSegs++;
+  
+       release_sock(sk);
+       
+       return(tmp);
+}
+
+static int tcp_v6_sendmsg(struct sock *sk, struct msghdr *msg,
+                         int len, int nonblock, int flags)
+{
+       struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+       int retval = -EINVAL;
+
+       /*
+        *      Do sanity checking for sendmsg/sendto/send
+        */
+
+       if (flags & ~(MSG_OOB|MSG_DONTROUTE))
+               goto out;
+       if (msg->msg_name) {
+               struct sockaddr_in6 *addr=(struct sockaddr_in6 *)msg->msg_name;
+
+               if (msg->msg_namelen < sizeof(*addr))
+                       goto out;
+
+               if (addr->sin6_family && addr->sin6_family != AF_INET6)
+                       goto out;
+               retval = -ENOTCONN;
+
+               if(sk->state == TCP_CLOSE)
+                       goto out;
+               retval = -EISCONN;
+               if (addr->sin6_port != sk->dummy_th.dest)
+                       goto out;
+               if (ipv6_addr_cmp(&addr->sin6_addr, &np->daddr))
+                       goto out;
+       }
+
+       lock_sock(sk);
+       retval = tcp_do_sendmsg(sk, msg->msg_iovlen, msg->msg_iov, 
+                               len, nonblock, flags);
+
+       release_sock(sk);
+
+out:
+       return retval;
+}
+
+void tcp_v6_err(int type, int code, unsigned char *header, __u32 info,
+               struct in6_addr *saddr, struct in6_addr *daddr,
+               struct inet6_protocol *protocol)
+{
+       struct tcphdr *th = (struct tcphdr *)header;
+       struct ipv6_pinfo *np;
+       struct sock *sk;
+       int err;
+       int opening;
+
+       sk = inet6_get_sock(&tcpv6_prot, daddr, saddr, th->source, th->dest);
+
+       if (sk == NULL)
+       {
+               return;
+       }
+
+       np = &sk->net_pinfo.af_inet6;
+
+       if (type == ICMPV6_PKT_TOOBIG)
+       {
+               /* icmp should have updated the destination cache entry */
+
+               np->dest = ipv6_dst_check(np->dest, &np->daddr, np->dc_sernum,
+                                         0);
+
+               np->dc_sernum = (np->dest->rt.fib_node ?
+                                np->dest->rt.fib_node->fn_sernum : 0);
+
+               if (np->dest->dc_flags & DCF_PMTU)
+                       sk->mtu = np->dest->dc_pmtu;
+
+               sk->mtu = (sk->mtu - sizeof(struct ipv6hdr) - 
+                          sizeof(struct tcphdr));
+
+               return;
+       }
+
+       opening = (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV);
+       
+       if (icmpv6_err_convert(type, code, &err) || opening)
+       {
+               sk->err = err;
+               if (opening)
+               {
+                       tcp_statistics.TcpAttemptFails++;
+                       tcp_set_state(sk,TCP_CLOSE);
+                       sk->error_report(sk);
+               }
+       }
+       else
+               sk->err_soft = err;
+}
+
+
+static void tcp_v6_send_synack(struct sock *sk, struct open_request *req)
+{
+       struct tcp_v6_open_req *af_req = (struct tcp_v6_open_req *) req;
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+       struct sk_buff * skb;
+       struct tcphdr *th;
+       unsigned char *ptr;
+       struct dest_entry *dc;
+       int mss;
+
+       skb = sock_wmalloc(sk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
+       
+       if (skb == NULL)
+       {
+               return;
+       }
+
+       skb_reserve(skb, (MAX_HEADER + 15) & ~15);
+       skb->ipv6_hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
+
+       dc = ipv6_dst_route(&af_req->rmt_addr, af_req->dev, 0);
+
+       skb->dev = af_req->dev;
+       
+       if (dc)
+       {
+               if (dc->dc_flags & DCF_PMTU)
+                       mss = dc->dc_pmtu;
+               else
+                       mss = dc->dc_nexthop->dev->mtu;
+               mss -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+
+               ipv6_dst_unlock(dc);
+       }
+       else
+               mss = 516;
+
+       th =(struct tcphdr *) skb_put(skb, sizeof(struct tcphdr));
+       skb->h.th = th;
+       memset(th, 0, sizeof(struct tcphdr));
+       
+       th->syn = 1;
+       th->ack = 1;
+
+       th->source = sk->dummy_th.source;
+       th->dest = req->rmt_port;
+              
+       skb->seq = req->snt_isn;
+       skb->end_seq = skb->seq + 1;
+
+       th->seq = ntohl(skb->seq);
+       th->ack_seq = htonl(req->rcv_isn + 1);
+       th->doff = sizeof(*th)/4 + 1;
+       
+       th->window = ntohs(tp->rcv_wnd);
+
+       ptr = skb_put(skb, TCPOLEN_MSS);
+       ptr[0] = TCPOPT_MSS;
+       ptr[1] = TCPOLEN_MSS;
+       ptr[2] = (mss >> 8) & 0xff;
+       ptr[3] = mss & 0xff;
+       skb->csum = csum_partial(ptr, TCPOLEN_MSS, 0);
+
+       th->check = tcp_v6_check(th, sizeof(*th) + TCPOLEN_MSS, &af_req->loc_addr, 
+                                &af_req->rmt_addr,
+                                csum_partial((char *)th, sizeof(*th), skb->csum));
+
+       ipv6_xmit(sk, skb, &af_req->loc_addr, &af_req->rmt_addr, af_req->opt,
+                 IPPROTO_TCP);
+                                
+       tcp_statistics.TcpOutSegs++;
+                                             
+}
+
+static void tcp_v6_or_free(struct open_request *req)
+{
+}
+
+static struct or_calltable or_ipv6 = {
+       tcp_v6_send_synack,
+       tcp_v6_or_free
+};
+
+static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
+                              __u32 isn)
+{
+       struct tcp_v6_open_req *af_req;
+       struct open_request *req;
+       
+       /* If the socket is dead, don't accept the connection.  */
+       if (sk->dead)
+       {
+               if(sk->debug)
+               {
+                       printk("Reset on %p: Connect on dead socket.\n",sk);
+               }
+               tcp_statistics.TcpAttemptFails++;
+               return -ENOTCONN;               
+       }
+
+       if (skb->protocol == __constant_htons(ETH_P_IP))
+       {
+               return tcp_v4_conn_request(sk, skb, ptr, isn);
+       }
+
+       /*
+        *      There are no SYN attacks on IPv6, yet...
+        */
+       if (sk->ack_backlog >= sk->max_ack_backlog)
+       {
+               printk(KERN_DEBUG "droping syn ack:%d max:%d\n",
+                      sk->ack_backlog, sk->max_ack_backlog);
+               tcp_statistics.TcpAttemptFails++;
+               goto exit;
+       }
+
+       af_req = kmalloc(sizeof(struct tcp_v6_open_req), GFP_ATOMIC);
+       
+       if (af_req == NULL)
+       {
+               tcp_statistics.TcpAttemptFails++;
+               goto exit;              
+       }
+
+       sk->ack_backlog++;
+       req = (struct open_request *) af_req;
+
+       memset(af_req, 0, sizeof(struct tcp_v6_open_req));
+
+       req->rcv_isn = skb->seq;
+       req->snt_isn = isn;
+
+       /* mss */
+       req->mss = tcp_parse_options(skb->h.th);
+
+       if (!req->mss)
+       {
+               req->mss = 536;
+       }
+
+       req->rmt_port = skb->h.th->source;
+
+       ipv6_addr_copy(&af_req->rmt_addr, &skb->ipv6_hdr->saddr);
+       ipv6_addr_copy(&af_req->loc_addr, &skb->ipv6_hdr->daddr);
+
+       /* FIXME: options */
+
+       /* keep incoming device so that link locals have meaning */
+       af_req->dev = skb->dev;
+
+       req->class = &or_ipv6;
+
+       tcp_v6_send_synack(sk, req);
+
+       req->expires = jiffies + TCP_TIMEOUT_INIT;
+       tcp_inc_slow_timer(TCP_SLT_SYNACK);
+       tcp_synq_queue(&sk->tp_pinfo.af_tcp, req);      
+
+       sk->data_ready(sk, 0);
+
+  exit:
+       kfree_skb(skb, FREE_READ);
+       return 0;
+}
+
+static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
+                             struct sk_buff *skb)
+{
+       struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+       th->check = 0;
+       
+       th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 
+                                   csum_partial((char *)th, sizeof(*th), 
+                                                skb->csum));
+}
+
+static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+                                         struct open_request *req)
+{
+       struct tcp_v6_open_req *af_req = (struct tcp_v6_open_req *) req;
+       struct ipv6_pinfo *np;
+       struct dest_entry *dc;
+       struct tcp_opt *newtp;
+       struct sock *newsk;
+       
+
+       if (skb->protocol == __constant_htons(ETH_P_IP))
+       {
+               /* 
+                *      v6 mapped 
+                */
+               
+               newsk = tcp_v4_syn_recv_sock(sk, skb, req);
+
+               if (newsk == NULL)
+                       return NULL;
+               
+               np = &newsk->net_pinfo.af_inet6;
+
+               ipv6_addr_set(&np->daddr, 0, 0, __constant_htonl(0x0000FFFF),
+                             newsk->daddr);
+
+               ipv6_addr_set(&np->saddr, 0, 0, __constant_htonl(0x0000FFFF),
+                             newsk->saddr);
+
+               ipv6_addr_copy(&np->rcv_saddr, &np->saddr);
+
+               newsk->tp_pinfo.af_tcp.af_specific = &ipv6_mapped;
+               newsk->backlog_rcv = tcp_v4_backlog_rcv;
+
+               return newsk;
+       }
+
+       newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
+       if (newsk == NULL)
+       {
+               return NULL;
+       }
+
+       memcpy(newsk, sk, sizeof(*newsk));
+       newsk->opt = NULL;
+       newsk->ip_route_cache  = NULL;
+       skb_queue_head_init(&newsk->write_queue);
+       skb_queue_head_init(&newsk->receive_queue);
+       skb_queue_head_init(&newsk->out_of_order_queue);
+       
+       /*
+        *      Unused
+        */
+
+       newsk->send_head = NULL;
+       newsk->send_tail = NULL;
+
+       newtp = &(newsk->tp_pinfo.af_tcp);
+       np = &newsk->net_pinfo.af_inet6;
+
+       newtp->send_head = NULL;
+       newtp->retrans_head = NULL;
+
+       newtp->pending = 0;
+
+       skb_queue_head_init(&newsk->back_log);
+
+       newsk->prot->init(newsk);
+
+       newsk->cong_count = 0;
+       newsk->ssthresh = 0;
+       newtp->backoff = 0;
+       newsk->blog = 0;
+       newsk->intr = 0;
+       newsk->proc = 0;
+       newsk->done = 0;
+       newsk->partial = NULL;
+       newsk->pair = NULL;
+       newsk->wmem_alloc = 0;
+       newsk->rmem_alloc = 0;
+       newsk->localroute = sk->localroute;
+
+       newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
+
+       newsk->err = 0;
+       newsk->shutdown = 0;
+       newsk->ack_backlog = 0;
+
+       newsk->fin_seq = req->rcv_isn;
+       newsk->syn_seq = req->rcv_isn;
+       newsk->state = TCP_SYN_RECV;
+       newsk->timeout = 0;
+       newsk->ip_xmit_timeout = 0;
+
+       newsk->write_seq = req->snt_isn;
+
+       newtp->snd_wnd = ntohs(skb->h.th->window);
+       newsk->max_window = newtp->snd_wnd;
+       newtp->snd_wl1 = req->rcv_isn;
+       newtp->snd_wl2 = newsk->write_seq;
+       newtp->snd_una = newsk->write_seq++;
+       newtp->snd_nxt = newsk->write_seq;
+
+       newsk->urg_data = 0;
+       newsk->packets_out = 0;
+       newsk->retransmits = 0;
+       newsk->linger=0;
+       newsk->destroy = 0;
+       init_timer(&newsk->timer);
+       newsk->timer.data = (unsigned long) newsk;
+       newsk->timer.function = &net_timer;
+
+       tcp_init_xmit_timers(newsk);
+
+       newsk->dummy_th.source = sk->dummy_th.source;
+       newsk->dummy_th.dest = req->rmt_port;
+       
+       newtp->rcv_nxt = req->rcv_isn + 1;
+       newtp->rcv_wup = req->rcv_isn + 1;
+       newsk->copied_seq = req->rcv_isn + 1;
+
+       newsk->socket = NULL;
+
+       ipv6_addr_copy(&np->daddr, &af_req->rmt_addr);
+       ipv6_addr_copy(&np->saddr, &af_req->loc_addr);
+       ipv6_addr_copy(&np->rcv_saddr, &af_req->loc_addr);
+       
+       /*
+        *      options / mss
+        */
+       
+       dc = ipv6_dst_route(&af_req->rmt_addr, af_req->dev, 0);
+       np->dest = dc;
+
+       if (np->dest && (np->dest->dc_flags & DCF_PMTU))
+               newsk->mtu = np->dest->dc_pmtu;
+       else
+               newsk->mtu = af_req->dev->mtu;
+
+       newsk->mss = min(req->mss, (newsk->mtu - sizeof(struct ipv6hdr) - 
+                                   sizeof(struct tcphdr)));
+       
+       newsk->daddr    = LOOPBACK4_IPV6;
+       newsk->saddr    = LOOPBACK4_IPV6;
+       newsk->rcv_saddr= LOOPBACK4_IPV6;
+       
+       inet_put_sock(newsk->num, newsk);
+
+       return newsk;
+
+}
+
+static void tcp_v6_send_reset(struct in6_addr *saddr, struct in6_addr *daddr, 
+                             struct tcphdr *th, struct proto *prot, 
+                             struct ipv6_options *opt,
+                             struct device *dev, int pri, int hop_limit)
+{
+       struct sk_buff *buff;
+       struct tcphdr *t1;
+
+       if(th->rst)
+               return;
+
+       /*
+        * We need to grab some memory, and put together an RST,
+        * and then put it into the queue to be sent.
+        */
+
+       buff = alloc_skb(MAX_RESET_SIZE, GFP_ATOMIC);
+       if (buff == NULL) 
+               return;
+
+       buff->sk = NULL;
+       buff->dev = dev;
+       buff->localroute = 0;
+
+       tcp_v6_build_header(NULL, buff);
+
+       t1 = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr));
+       memset(t1, 0, sizeof(*t1));
+
+       /*
+        *      Swap the send and the receive. 
+        */
+
+       t1->dest = th->source;
+       t1->source = th->dest;
+       t1->doff = sizeof(*t1)/4;
+       t1->rst = 1;
+  
+       if(th->ack)
+       {
+               t1->seq = th->ack_seq;
+       }
+       else
+       {
+               t1->ack = 1;
+               if(!th->syn)
+                       t1->ack_seq = th->seq;
+               else
+                       t1->ack_seq = htonl(ntohl(th->seq)+1);
+       }
+
+       buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
+       
+       t1->check = csum_ipv6_magic(saddr, daddr, sizeof(*t1), IPPROTO_TCP,
+                                   buff->csum);
+
+       
+       ipv6_xmit(NULL, buff, saddr, daddr, NULL, IPPROTO_TCP);
+       
+       tcp_statistics.TcpOutSegs++;
+}
+
+int tcp_v6_rcv(struct sk_buff *skb, struct device *dev,
+              struct in6_addr *saddr, struct in6_addr *daddr,
+              struct ipv6_options *opt, unsigned short len,
+              int redo, struct inet6_protocol *protocol)
+{
+       struct tcphdr *th;      
+       struct sock *sk;
+
+       /*
+        * "redo" is 1 if we have already seen this skb but couldn't
+        * use it at that time (the socket was locked).  In that case
+        * we have already done a lot of the work (looked up the socket
+        * etc).
+        */
+
+       th = skb->h.th;
+
+       sk = skb->sk;
+
+       if (!redo)
+       {
+
+               if (skb->pkt_type != PACKET_HOST)
+                       goto discard_it;
+
+               /*
+                *      Pull up the IP header.
+                */
+       
+               skb_pull(skb, skb->h.raw - skb->data);
+
+               /*
+                *      Try to use the device checksum if provided.
+                */
+               
+               switch (skb->ip_summed) 
+               {
+                       case CHECKSUM_NONE:
+                               skb->csum = csum_partial((char *)th, len, 0);
+                       case CHECKSUM_HW:
+                               if (tcp_v6_check(th,len,saddr,daddr,skb->csum))
+                               {
+                                       printk(KERN_DEBUG "tcp csum failed\n");
+                                       goto discard_it;
+                               }
+                       default:
+                               /* CHECKSUM_UNNECESSARY */
+               }
+
+               sk = inet6_get_sock(&tcpv6_prot, daddr, saddr, 
+                                   th->dest, th->source);
+
+               if (!sk) 
+               {
+                       printk(KERN_DEBUG "socket not found\n");
+                       goto no_tcp_socket;
+               }
+
+               skb->sk = sk;
+               skb->seq = ntohl(th->seq);
+               skb->end_seq = skb->seq + th->syn + th->fin + len - th->doff*4;
+               skb->ack_seq = ntohl(th->ack_seq);
+
+               skb->acked = 0;
+               skb->used = 0;
+               skb->free = 1;
+       }               
+
+       /*
+        * We may need to add it to the backlog here. 
+        */
+
+       if (sk->users) 
+       {
+               __skb_queue_tail(&sk->back_log, skb);
+               return(0);
+       }
+
+       /*
+        *      Signal NDISC that the connection is making
+        *      "forward progress"
+        */
+       if (sk->state != TCP_LISTEN)
+       {
+               struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+               struct tcp_opt *tp=&(sk->tp_pinfo.af_tcp);
+
+               if (after(skb->seq, tp->rcv_nxt) ||
+                   after(skb->ack_seq, tp->snd_una))
+               {
+                       if (np->dest)
+                               ndisc_validate(np->dest->dc_nexthop);
+               }
+       }
+
+       if (!sk->prot) 
+       {
+               printk(KERN_DEBUG "tcp_rcv: sk->prot == NULL\n");
+               return(0);
+       }
+
+       atomic_add(skb->truesize, &sk->rmem_alloc);
+
+       if (sk->state == TCP_ESTABLISHED)
+       {
+               tcp_rcv_established(sk, skb, th, len);
+               return 0;
+       }
+       
+       if (sk->state == TCP_LISTEN)
+       {
+               struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+               struct open_request *req;
+               struct tcp_v6_open_req *af_req;
+
+               req = tp->syn_wait_queue;
+               af_req = (struct tcp_v6_open_req *) req;
+               
+               if (req)
+               {
+                       do {
+                               if (!ipv6_addr_cmp(&af_req->rmt_addr, saddr) &&
+                                   !ipv6_addr_cmp(&af_req->loc_addr, daddr) &&
+                                   req->rmt_port == th->source)
+                               {
+                                       /* match */
+                                       
+                                       atomic_sub(skb->truesize, &sk->rmem_alloc);
+                                       sk = tp->af_specific->syn_recv_sock(sk, skb,
+                                                                           req);
+                                       tcp_dec_slow_timer(TCP_SLT_SYNACK);
+                                       
+                                       if (sk == NULL)
+                                       {
+                                               goto no_tcp_socket;
+                                       }
+                                       
+                                       atomic_add(skb->truesize, &sk->rmem_alloc);
+                                       req->sk = sk;
+                                       skb->sk = sk;
+                                       break;
+                               }
+
+                               req = req->dl_next;
+                       } while (req != tp->syn_wait_queue);
+               }
+
+       }
+
+       if (tcp_rcv_state_process(sk, skb, th, opt, len) == 0)
+               return 0;
+
+no_tcp_socket:
+
+       /*
+        *      No such TCB. If th->rst is 0 send a reset 
+        *      (checked in tcp_send_reset)
+        */
+
+       tcp_v6_send_reset(daddr, saddr, th, &tcpv6_prot, opt, dev, 
+                         skb->ipv6_hdr->priority, 255);
+
+discard_it:
+
+       /*
+        *      Discard frame
+        */
+
+       kfree_skb(skb, FREE_READ);
+       return 0;
+
+}
+
+static int tcp_v6_rebuild_header(struct sock *sk, struct sk_buff *skb)
+{
+       struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+       
+       if (np->dest)
+       {
+               np->dest = ipv6_dst_check(np->dest, &np->daddr,
+                                         np->dc_sernum, 0);
+                                         
+       }
+       else
+       {
+               np->dest = ipv6_dst_route(&np->daddr, NULL, 0);
+       }
+
+       if (!np->dest)
+       {
+               /*
+                *      lost route to destination
+                */
+               return -1;
+       }
+       
+       np->dc_sernum = (np->dest->rt.fib_node ?
+                        np->dest->rt.fib_node->fn_sernum : 0);
+
+       ipv6_redo_mac_hdr(skb, np->dest->dc_nexthop,
+                         skb->tail - (u8*) skb->ipv6_hdr);
+       return 0;
+}
+
+static int tcp_v6_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+{
+       int res;
+
+       res = tcp_v6_rcv(skb, skb->dev,
+                        &skb->ipv6_hdr->saddr, &skb->ipv6_hdr->daddr,
+                        (struct ipv6_options *) skb->proto_priv,
+                        skb->len, 1, 
+                        (struct inet6_protocol *) sk->pair);
+       return res;
+}
+
+static struct sock * tcp_v6_get_sock(struct sk_buff *skb, struct tcphdr *th)
+{
+       struct in6_addr *saddr;
+       struct in6_addr *daddr;
+       struct sock *sk;
+
+       saddr = &skb->ipv6_hdr->saddr;
+       daddr = &skb->ipv6_hdr->daddr;
+
+       sk = inet6_get_sock(&tcpv6_prot, daddr, saddr, th->source, th->dest);
+
+       return sk;
+}
+       
+static int tcp_v6_build_header(struct sock *sk, struct sk_buff *skb)
+{
+       skb_reserve(skb, (MAX_HEADER + 15) & ~15);
+       skb->ipv6_hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
+
+       /*
+        *      FIXME: reserve space for option headers
+        *      length member of np->opt
+        */
+
+       return 0;
+}
+
+static void tcp_v6_xmit(struct sock *sk, struct device *dev, struct sk_buff *skb,
+                       int free)
+{
+       struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6;
+       int err;
+
+       err = ipv6_xmit(sk, skb, &np->saddr, &np->daddr, NULL, IPPROTO_TCP);
+       
+       /*
+        *      FIXME: check error handling.
+        */
+
+       sk->err_soft = err;
+}
+                    
+
+
+static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
+{
+       struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6;
+       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
+       
+       sin6->sin6_family = AF_INET6;
+       memcpy(&sin6->sin6_addr, &np->daddr, sizeof(struct in6_addr));
+       sin6->sin6_port = sk->dummy_th.dest;
+
+}
+
+static struct tcp_func ipv6_specific = {
+       tcp_v6_build_header,
+       tcp_v6_xmit,
+       tcp_v6_send_check,
+       tcp_v6_rebuild_header,
+       tcp_v6_conn_request,
+       tcp_v6_syn_recv_sock,
+       tcp_v6_init_sequence,
+       tcp_v6_get_sock,
+       ipv6_setsockopt,
+       ipv6_getsockopt,
+       v6_addr2sockaddr,
+       sizeof(struct sockaddr_in6)
+};
+
+/*
+ *     TCP over IPv4 via INET6 API
+ */
+
+static struct tcp_func ipv6_mapped = {
+       tcp_v4_build_header,
+       ip_queue_xmit,
+       tcp_v4_send_check,
+       tcp_v4_rebuild_header,
+       tcp_v6_conn_request,
+       tcp_v6_syn_recv_sock,
+       tcp_v6_init_sequence,
+       tcp_v6_get_sock,
+       ipv6_setsockopt,
+       ipv6_getsockopt,
+       v6_addr2sockaddr,
+       sizeof(struct sockaddr_in6)
+};
+
+static int tcp_v6_init_sock(struct sock *sk)
+{
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+       skb_queue_head_init(&sk->out_of_order_queue);
+       tcp_init_xmit_timers(sk);
+
+       tp->srtt  = 0;
+       tp->rto  = TCP_TIMEOUT_INIT;            /*TCP_WRITE_TIME*/
+       tp->mdev = TCP_TIMEOUT_INIT;
+
+       tp->ato = 0;
+       tp->iat = (HZ/5) << 3;
+
+       tp->rcv_wnd = 8192;
+
+       /* start with only sending one packet at a time. */
+       sk->cong_window = 1;
+       sk->ssthresh = 0x7fffffff;
+
+       sk->priority = 1;
+       sk->state = TCP_CLOSE;
+
+       /* this is how many unacked bytes we will accept for this socket.  */
+       sk->max_unacked = 2048; /* needs to be at most 2 full packets. */
+       sk->max_ack_backlog = SOMAXCONN;
+       
+       sk->mtu = 576;
+       sk->mss = 516;
+
+       sk->dummy_th.doff = sizeof(sk->dummy_th)/4;
+       
+
+       /*
+        *      Speed up by setting some standard state for the dummy_th
+        *      if TCP uses it (maybe move to tcp_init later)
+        */
+       
+       sk->dummy_th.ack=1;     
+       sk->dummy_th.doff=sizeof(struct tcphdr)>>2;
+
+       sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific;
+
+       return 0;
+}
+
+static int tcp_v6_destroy_sock(struct sock *sk)
+{
+       struct ipv6_pinfo * np = &sk->net_pinfo.af_inet6;
+       struct sk_buff *skb;
+
+       tcp_clear_xmit_timers(sk);
+       
+       if (sk->keepopen)
+       {
+               tcp_dec_slow_timer(TCP_SLT_KEEPALIVE);
+       }
+
+       /*
+        *      Cleanup up the write buffer. 
+        */
+        
+       while((skb = skb_dequeue(&sk->write_queue)) != NULL) {
+               IS_SKB(skb);
+               skb->free = 1;
+               kfree_skb(skb, FREE_WRITE);
+       }
+
+       /*
+        *  Cleans up our, hopefuly empty, out_of_order_queue
+        */
+
+       while((skb = skb_dequeue(&sk->out_of_order_queue)) != NULL) {
+               IS_SKB(skb);
+               kfree_skb(skb, FREE_READ);
+       }
+
+       /*
+        *      Release destination entry
+        */
+
+       if (np->dest)
+       {
+               ipv6_dst_unlock(np->dest);
+       }
+
+       return 0;
+}
+
+
+struct proto tcpv6_prot = {
+       tcp_close,
+       tcp_v6_connect,
+       tcp_accept,
+       NULL,
+       tcp_write_wakeup,
+       tcp_read_wakeup,
+       tcp_select,
+       tcp_ioctl,
+       tcp_v6_init_sock,
+       tcp_v6_destroy_sock,
+       tcp_shutdown,
+       tcp_setsockopt,
+       tcp_getsockopt,
+       tcp_v6_sendmsg,
+       tcp_recvmsg,
+       NULL,                   /* No special bind()    */
+       tcp_v6_backlog_rcv,
+       128,
+       0,
+       "TCPv6",
+       0, 0,
+       NULL
+};
+
+static struct inet6_protocol tcpv6_protocol = 
+{
+       tcp_v6_rcv,             /* TCP handler          */
+       tcp_v6_err,             /* TCP error control    */
+       NULL,                   /* next                 */
+       IPPROTO_TCP,            /* protocol ID          */
+       0,                      /* copy                 */
+       NULL,                   /* data                 */
+       "TCPv6"                 /* name                 */
+};
+
+
+void tcpv6_init(void)
+{
+       /* register inet6 protocol */
+       inet6_add_protocol(&tcpv6_protocol);
+}
+
+/*
+ * Local variables:
+ *  compile-command: "gcc -D__KERNEL__ -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer -fno-strength-reduce -pipe -m486 -DCPU=486 -DMODULE -DMODVERSIONS -include /usr/src/linux/include/linux/modversions.h  -c -o tcp_ipv6.o tcp_ipv6.c"
+ * c-file-style: "Linux"
+ * End:
+ */
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
new file mode 100644 (file)
index 0000000..0b7de6b
--- /dev/null
@@ -0,0 +1,623 @@
+/*
+ *     UDP over IPv6
+ *     Linux INET6 implementation 
+ *
+ *     Authors:
+ *     Pedro Roque             <roque@di.fc.ul.pt>     
+ *
+ *     Based on linux/ipv4/udp.c
+ *
+ *     $Id: udp.c,v 1.6 1996/10/16 18:34:16 roque Exp $
+ *
+ *     This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/ipv6_route.h>
+#include <net/addrconf.h>
+#include <net/ip.h>
+#include <net/udp.h>
+
+#include <net/checksum.h>
+
+struct udp_mib udp_stats_in6;
+
+/*
+ *
+ */
+
+int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+       struct sockaddr_in6     *usin = (struct sockaddr_in6 *) uaddr;
+       struct in6_addr         *daddr;
+       struct dest_entry       *dest;
+       struct ipv6_pinfo       *np;
+       struct inet6_ifaddr     *ifa;
+       int                     addr_type;
+
+       if (addr_len < sizeof(*usin)) 
+               return(-EINVAL);
+
+       if (usin->sin6_family && usin->sin6_family != AF_INET6) 
+               return(-EAFNOSUPPORT);
+
+       addr_type = ipv6_addr_type(&usin->sin6_addr);
+       np = &sk->net_pinfo.af_inet6;
+
+       if (addr_type == IPV6_ADDR_ANY)
+       {
+               /*
+                *      connect to self
+                */
+               usin->sin6_addr.s6_addr[15] = 0x01;
+       }
+
+       daddr = &usin->sin6_addr;
+
+       if (addr_type == IPV6_ADDR_MAPPED)
+       {
+               struct sockaddr_in sin;
+               int err;
+
+               sin.sin_family = AF_INET;
+               sin.sin_addr.s_addr = daddr->s6_addr32[3];
+
+               err = udp_connect(sk, (struct sockaddr*) &sin, sizeof(sin));
+               
+               if (err < 0)
+               {
+                       return err;
+               }
+               
+               ipv6_addr_copy(&np->daddr, daddr);
+               
+               if(ipv6_addr_any(&np->saddr))
+               {
+                       ipv6_addr_set(&np->saddr, 0, 0, 
+                                     __constant_htonl(0x0000ffff),
+                                     sk->saddr);
+
+               }
+
+               if(ipv6_addr_any(&np->rcv_saddr))
+               {
+                       ipv6_addr_set(&np->rcv_saddr, 0, 0, 
+                                     __constant_htonl(0x0000ffff),
+                                     sk->rcv_saddr);
+               }
+
+       }
+
+       ipv6_addr_copy(&np->daddr, daddr);
+
+       /*
+        *      Check for a route to destination an obtain the
+        *      destination cache for it.
+        */
+
+       dest = ipv6_dst_route(daddr, NULL, sk->localroute ? RTI_GATEWAY : 0);
+
+       np->dest = dest;
+
+       if (dest == NULL)
+               return -ENETUNREACH;
+
+       /* get the source adddress used in the apropriate device */
+
+       ifa = ipv6_get_saddr((struct rt6_info *) dest, daddr);
+
+       if(ipv6_addr_any(&np->saddr))
+       {
+               ipv6_addr_copy(&np->saddr, &ifa->addr);
+       }
+
+       if(ipv6_addr_any(&np->rcv_saddr))
+       {
+               ipv6_addr_copy(&np->rcv_saddr, &ifa->addr);
+               sk->rcv_saddr = 0xffffffff;
+       }
+
+       sk->dummy_th.dest = usin->sin6_port;
+
+       sk->state = TCP_ESTABLISHED;
+
+       return(0);
+}
+
+static void udpv6_close(struct sock *sk, unsigned long timeout)
+{
+       struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+
+       lock_sock(sk);
+       sk->state = TCP_CLOSE;
+
+       if (np->dest)
+       {
+               ipv6_dst_unlock(np->dest);
+       }
+
+       release_sock(sk);
+       destroy_sock(sk);
+}
+
+/*
+ *     This should be easy, if there is something there we
+ *     return it, otherwise we block.
+ */
+
+int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, int len,
+                 int noblock, int flags, int *addr_len)
+{
+       int copied = 0;
+       int truesize;
+       struct sk_buff *skb;
+       int er;
+       
+
+       /*
+        *      Check any passed addresses
+        */
+        
+       if (addr_len) 
+               *addr_len=sizeof(struct sockaddr_in6);
+  
+       /*
+        *      From here the generic datagram does a lot of the work. Come
+        *      the finished NET3, it will do _ALL_ the work!
+        */
+               
+       skb = skb_recv_datagram(sk, flags, noblock, &er);
+       if(skb==NULL)
+               return er;
+  
+       truesize = skb->tail - skb->h.raw - sizeof(struct udphdr);
+       copied = min(len, truesize);
+
+       /*
+        *      FIXME : should use udp header size info value 
+        */
+        
+       skb_copy_datagram_iovec(skb,sizeof(struct udphdr),msg->msg_iov,copied);
+       sk->stamp=skb->stamp;
+
+       /* Copy the address. */
+       if (msg->msg_name) 
+       {
+               struct sockaddr_in6 *sin6;
+         
+               sin6 = (struct sockaddr_in6 *) msg->msg_name;
+               
+               sin6->sin6_family = AF_INET6;
+               sin6->sin6_port = skb->h.uh->source;
+
+               if (skb->protocol == __constant_htons(ETH_P_IP))
+               {
+                       ipv6_addr_set(&sin6->sin6_addr, 0, 0,
+                                     __constant_htonl(0xffff), skb->daddr);
+               }
+               else
+               {
+                       memcpy(&sin6->sin6_addr, &skb->ipv6_hdr->saddr,
+                              sizeof(struct in6_addr));
+
+                       if (msg->msg_control)
+                       {
+                               int err;
+
+                               err = datagram_recv_ctl(sk, msg, skb);
+
+                               if (err < 0)
+                               {
+                                       copied = err;
+                               }
+                       }
+               }
+       }
+       
+       skb_free_datagram(sk, skb);
+       return(copied);
+}
+
+void udpv6_err(int type, int code, unsigned char *buff, __u32 info,
+              struct in6_addr *saddr, struct in6_addr *daddr,
+              struct inet6_protocol *protocol)
+{
+       struct sock *sk;
+       struct udphdr *uh;
+       int err;
+       
+       uh = (struct udphdr *) buff;
+
+       sk = inet6_get_sock(&udpv6_prot, daddr, saddr, uh->source, uh->dest);
+   
+       if (sk == NULL)
+       {
+               printk(KERN_DEBUG "icmp for unkown sock\n");
+               return;
+       }
+
+       if (icmpv6_err_convert(type, code, &err))
+       {
+               if(sk->bsdism && sk->state!=TCP_ESTABLISHED)
+                       return;
+               
+               sk->err = err;
+               sk->error_report(sk);
+       }
+       else
+               sk->err_soft = err;
+}
+
+static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
+{
+
+       if (sock_queue_rcv_skb(sk,skb)<0) {
+               udp_stats_in6.UdpInErrors++;
+               ipv6_statistics.Ip6InDiscards++;
+               ipv6_statistics.Ip6InDelivers--;
+               skb->sk = NULL;
+               kfree_skb(skb, FREE_WRITE);
+               return 0;
+       }
+       udp_stats_in6.UdpInDatagrams++;
+       return 0;
+}
+
+int udpv6_rcv(struct sk_buff *skb, struct device *dev,
+             struct in6_addr *saddr, struct in6_addr *daddr,
+             struct ipv6_options *opt, unsigned short len,
+             int redo, struct inet6_protocol *protocol)
+{
+       struct sock *sk;
+       struct udphdr *uh;
+       int ulen;
+
+       /*
+        *      check if the address is ours...
+        *      I believe that this is being done in IP layer
+        */
+
+       uh = (struct udphdr *) skb->h.uh;
+       
+       ipv6_statistics.Ip6InDelivers++;
+
+       ulen = ntohs(uh->len);
+       
+       if (ulen > len || len < sizeof(*uh))
+       {
+               printk(KERN_DEBUG "UDP: short packet: %d/%d\n", ulen, len);
+               udp_stats_in6.UdpInErrors++;
+               kfree_skb(skb, FREE_READ);
+               return(0);
+       }
+
+       if (uh->check == 0)
+       {
+               printk(KERN_DEBUG "IPv6: udp checksum is 0\n");
+               goto discard;
+       }
+
+       switch (skb->ip_summed) {
+       case CHECKSUM_NONE:
+               skb->csum = csum_partial((char*)uh, len, 0);
+       case CHECKSUM_HW:
+               if (csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, skb->csum))
+               {
+                       printk(KERN_DEBUG "IPv6: udp checksum error\n");
+                       goto discard;
+               }
+       }
+       
+       len = ulen;
+
+       /* 
+        *      Multicast receive code 
+        */
+       if (ipv6_addr_type(daddr) & IPV6_ADDR_MULTICAST)
+       {
+               struct sock *sk2;
+               int lport;
+               
+               lport = ntohs(uh->dest);
+               sk = udpv6_prot.sock_array[lport & (SOCK_ARRAY_SIZE-1)];
+
+               sk = inet6_get_sock_mcast(sk, lport, uh->source,
+                                         daddr, saddr);
+
+               if (sk)
+               {
+                       sk2 = sk;
+                       
+                       while ((sk2 = inet6_get_sock_mcast(sk2->next, lport,
+                                                          uh->source,
+                                                          daddr, saddr)))
+                       {
+                               struct sk_buff *buff;
+
+                               buff = skb_clone(skb, GFP_ATOMIC);
+
+                               if (sock_queue_rcv_skb(sk, buff) < 0) 
+                               {
+                                       buff->sk = NULL;
+                                       kfree_skb(buff, FREE_READ);
+                               }
+                       }
+               }
+               if (!sk || sock_queue_rcv_skb(sk, skb) < 0)
+               {
+                       skb->sk = NULL;
+                       kfree_skb(skb, FREE_READ);
+               }
+               return 0;
+       }
+
+       /* Unicast */
+       
+       /* 
+        * check socket cache ... must talk to Alan about his plans
+        * for sock caches... i'll skip this for now.
+        */
+
+       sk = inet6_get_sock(&udpv6_prot, daddr, saddr, uh->dest, uh->source);
+
+       if (sk == NULL)
+       {
+               udp_stats_in6.UdpNoPorts++;
+
+               icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH,
+                           0, dev);
+               
+               kfree_skb(skb, FREE_READ);
+               return(0);
+       }
+
+       /* deliver */
+
+       if (sk->users)
+       {
+               __skb_queue_tail(&sk->back_log, skb);
+       }
+       else
+       {
+               udpv6_queue_rcv_skb(sk, skb);
+       }
+       
+       return(0);
+
+  discard:
+       udp_stats_in6.UdpInErrors++;
+       kfree_skb(skb, FREE_READ);
+       return(0);      
+}
+
+/*
+ *     Sending
+ */
+
+struct udpv6fakehdr 
+{
+       struct udphdr   uh;
+       struct iovec    *iov;
+       __u32           wcheck;
+       __u32           pl_len;
+       struct in6_addr *daddr;
+};
+
+/*
+ *     with checksum
+ */
+
+static void udpv6_getfrag(const void *data, struct in6_addr *addr,
+                         char *buff, unsigned int offset, unsigned int len)
+{
+       struct udpv6fakehdr *udh = (struct udpv6fakehdr *) data;
+       char *dst;
+       int final = 0;
+       int clen = len;
+
+       dst = buff;
+
+       if (offset)
+       {
+               offset -= sizeof(struct udphdr);
+       }
+       else
+       {
+               dst += sizeof(struct udphdr);
+               final = 1;
+               clen -= sizeof(struct udphdr);
+       }
+
+       udh->wcheck = csum_partial_copy_fromiovecend(dst, udh->iov, offset,
+                                                    clen, udh->wcheck);
+
+       if (final)
+       {
+               struct in6_addr *daddr;
+               
+               udh->wcheck = csum_partial((char *)udh, sizeof(struct udphdr),
+                                          udh->wcheck);
+
+               if (udh->daddr)
+               {
+                       daddr = udh->daddr;
+               }
+               else
+               {
+                       /*
+                        *      use packet destination address
+                        *      this should improve cache locality
+                        */
+                       daddr = addr + 1;
+               }
+               udh->uh.check = csum_ipv6_magic(addr, daddr,
+                                               udh->pl_len, IPPROTO_UDP,
+                                               udh->wcheck);
+               if (udh->uh.check == 0)
+                       udh->uh.check = -1;
+
+               memcpy(buff, udh, sizeof(struct udphdr));
+       }
+}
+
+static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen, 
+                        int noblock, int flags)
+{
+       
+       struct ipv6_options opt_space;
+       struct udpv6fakehdr udh;
+       struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
+       struct ipv6_options *opt = NULL;
+       struct device *dev = NULL;
+       int addr_len = msg->msg_namelen;
+       struct in6_addr *daddr;
+       struct in6_addr *saddr = NULL;
+       int len = ulen + sizeof(struct udphdr);
+       int addr_type;
+       int err;
+
+       
+       if (flags & ~MSG_DONTROUTE)
+               return(-EINVAL);
+
+       if (sin6)
+       {
+               if (addr_len < sizeof(*sin6))
+                       return(-EINVAL);
+               
+               if (sin6->sin6_family && sin6->sin6_family != AF_INET6)
+                       return(-EINVAL);
+
+               if (sin6->sin6_port == 0)
+                       return(-EINVAL);
+              
+               udh.uh.dest = sin6->sin6_port;
+               daddr = &sin6->sin6_addr;
+
+               if (np->dest && ipv6_addr_cmp(daddr, &np->daddr))
+               {
+                       ipv6_dst_unlock(np->dest);
+                       np->dest = NULL;
+               }
+       }
+       else
+       {
+               if (sk->state != TCP_ESTABLISHED)
+                       return(-EINVAL);
+               
+               udh.uh.dest = sk->dummy_th.dest;
+               daddr = &sk->net_pinfo.af_inet6.daddr;
+       }
+
+       addr_type = ipv6_addr_type(daddr);
+
+       if (addr_type == IPV6_ADDR_MAPPED)
+       {
+               struct sockaddr_in sin;
+               
+               sin.sin_family = AF_INET;
+               sin.sin_addr.s_addr = daddr->s6_addr32[3];
+
+               return udp_sendmsg(sk, msg, len, noblock, flags);
+       }
+
+       udh.daddr = NULL;
+       
+       if (msg->msg_control)
+       {
+               opt = &opt_space;
+               memset(opt, 0, sizeof(struct ipv6_options));
+
+               err = datagram_send_ctl(msg, &dev, &saddr, opt);
+               if (err < 0)
+               {
+                       printk(KERN_DEBUG "invalid msg_control\n");
+                       return err;
+               }
+               
+               if (opt->srcrt)
+               {                       
+                       udh.daddr = daddr;
+               }
+       }
+       
+       udh.uh.source = sk->dummy_th.source;
+       udh.uh.len = htons(ulen);
+       udh.uh.check = 0;
+       udh.iov = msg->msg_iov;
+       udh.wcheck = 0;
+       udh.pl_len = len;
+       
+       err = ipv6_build_xmit(sk, udpv6_getfrag, &udh, daddr, len,
+                             saddr, dev, opt, IPPROTO_UDP, noblock);
+       
+       if (err < 0)
+               return err;
+
+       udp_stats_in6.UdpOutDatagrams++;
+       return ulen;
+}
+
+static struct inet6_protocol udpv6_protocol = 
+{
+       udpv6_rcv,              /* UDP handler          */
+       udpv6_err,              /* UDP error control    */
+       NULL,                   /* next                 */
+       IPPROTO_UDP,            /* protocol ID          */
+       0,                      /* copy                 */
+       NULL,                   /* data                 */
+       "UDPv6"                 /* name                 */
+};
+
+
+struct proto udpv6_prot = {
+       udpv6_close,
+       udpv6_connect,
+       NULL,
+       NULL,
+       NULL,
+       NULL,
+       datagram_select,
+       udp_ioctl,
+       NULL,
+       NULL,
+       NULL,
+       ipv6_setsockopt,
+       ipv6_getsockopt,
+       udpv6_sendmsg,
+       udpv6_recvmsg,
+       NULL,           /* No special bind function */
+       udpv6_queue_rcv_skb,
+       128,
+       0,
+       "UDP",
+       0, 0,
+       NULL
+};
+
+void udpv6_init(void)
+{
+       inet6_add_protocol(&udpv6_protocol);
+}
index a2794eb71af7257494cc6db542f78dc8bdb8281d..c496971ab975435a9aaed68561b9475802ccfc3e 100644 (file)
@@ -82,7 +82,8 @@ static int netlink_select(struct inode *inode, struct file *file, int sel_type,
  *     Write a message to the kernel side of a communication link
  */
  
-static int netlink_write(struct inode * inode, struct file * file, const char * buf, int count)
+static long netlink_write(struct inode * inode, struct file * file,
+                         const char * buf, unsigned long count)
 {
        unsigned int minor = MINOR(inode->i_rdev);
        struct sk_buff *skb;
@@ -96,7 +97,8 @@ static int netlink_write(struct inode * inode, struct file * file, const char *
  *     Read a message from the kernel side of the communication link
  */
 
-static int netlink_read(struct inode * inode, struct file * file, char * buf, int count)
+static long netlink_read(struct inode * inode, struct file * file, char * buf,
+                        unsigned long count)
 {
        unsigned int minor = MINOR(inode->i_rdev);
        struct sk_buff *skb;
@@ -124,8 +126,8 @@ static int netlink_read(struct inode * inode, struct file * file, char * buf, in
        return count;
 }
 
-static int netlink_lseek(struct inode * inode, struct file * file,
-                   off_t offset, int origin)
+static loff_t netlink_lseek(struct inode * inode, struct file * file,
+                           loff_t offset, int origin)
 {
        return -ESPIPE;
 }
index fe3c062a298c50efb533f4af8cd8fe959a044dc5..58d2111f539239219155f2edb67febd41a54a8ae 100644 (file)
 #include <net/tcp.h>
 #include <net/icmp.h>
 #include <net/route.h>
+#include <net/inet_common.h>
 #include <linux/net_alias.h>
+
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#include <linux/in6.h>
+#include <net/ndisc.h>
+#include <net/transp_v6.h>
+#endif
+
 #endif
 
 #ifdef CONFIG_NETLINK
@@ -55,9 +63,6 @@ extern void destroy_EII_client(struct datalink_proto *);
 extern void destroy_8023_client(struct datalink_proto *);
 #endif
 
-#ifdef CONFIG_DLCI_MODULE
-extern int (*dlci_ioctl_hook)(unsigned int, void *);
-#endif
 
 static struct symbol_table net_syms = {
 #include <linux/symtab_begin.h>
@@ -89,14 +94,10 @@ static struct symbol_table net_syms = {
 
 #ifdef CONFIG_INET
        /* Internet layer registration */
+       X(get_new_socknum),
        X(inet_add_protocol),
        X(inet_del_protocol),
        X(rarp_ioctl_hook),
-
-#ifdef CONFIG_DLCI_MODULE
-        X(dlci_ioctl_hook),
-#endif
-
        X(init_etherdev),
        X(ip_rt_route),
        X(icmp_send),
@@ -109,6 +110,79 @@ static struct symbol_table net_syms = {
        X(ip_forward),
 #endif
 
+#ifdef CONFIG_IPV6_MODULE
+       /* inet functions common to v4 and v6 */
+       X(inet_proto_ops),
+       X(inet_remove_sock),
+       X(inet_release),
+       X(inet_connect),
+       X(inet_accept),
+       X(inet_select),
+       X(inet_listen),
+       X(inet_shutdown),
+       X(inet_setsockopt),
+       X(inet_getsockopt),
+       X(inet_fcntl),
+       X(inet_sendmsg),
+       X(inet_recvmsg),
+       X(tcp_sock_array),
+       X(udp_sock_array),
+       X(destroy_sock),
+       X(ip_queue_xmit),
+       X(csum_partial),
+       X(ip_my_addr),
+       X(skb_copy),
+       X(dev_lockct),
+       X(ndisc_eth_hook),
+       X(memcpy_fromiovecend),
+       X(csum_partial_copy),
+       X(csum_partial_copy_fromiovecend),
+       X(__release_sock),
+       X(net_timer),
+       X(inet_put_sock),
+       /* UDP/TCP exported functions for TCPv6 */
+       X(udp_ioctl),
+       X(udp_connect),
+       X(udp_sendmsg),
+       X(tcp_cache_zap),
+       X(tcp_close),
+       X(tcp_accept),
+       X(tcp_write_wakeup),
+       X(tcp_read_wakeup),
+       X(tcp_select),
+       X(tcp_ioctl),
+       X(tcp_shutdown),
+       X(tcp_setsockopt),
+       X(tcp_getsockopt),
+       X(tcp_recvmsg),
+       X(tcp_send_synack),
+       X(sock_wfree),
+       X(sock_wmalloc),
+       X(tcp_reset_xmit_timer),
+       X(tcp_parse_options),
+       X(tcp_rcv_established),
+       X(tcp_init_xmit_timers),
+       X(tcp_clear_xmit_timers),
+       X(tcp_slt_array),
+       X(tcp_slow_timer),
+       X(tcp_statistics),
+       X(tcp_rcv_state_process),
+       X(tcp_do_sendmsg),
+       X(tcp_v4_build_header),
+       X(tcp_v4_rebuild_header),
+       X(tcp_v4_send_check),
+       X(tcp_v4_conn_request),
+       X(tcp_v4_syn_recv_sock),
+       X(tcp_v4_backlog_rcv),
+       X(tcp_v4_connect),
+       X(ip_chk_addr),
+       X(net_reset_timer),
+       X(net_delete_timer),
+       X(udp_prot),
+       X(tcp_prot),
+       X(ipv4_specific),
+#endif
+
 #if    defined(CONFIG_ULTRA)   ||      defined(CONFIG_WD80x3)          || \
        defined(CONFIG_EL2)     ||      defined(CONFIG_NE2000)          || \
        defined(CONFIG_E2100)   ||      defined(CONFIG_HPLAN_PLUS)      || \
index f10cc40612121f9f9c25dfcb056b89d5a08fb2d0..23e3d55b2e959840a0b09094074588c1ad65ceea 100644 (file)
 #ifdef CONFIG_UNIX
 #include <net/af_unix.h>
 #endif
+
 #ifdef CONFIG_INET
 #include <linux/inet.h>
+#ifdef CONFIG_IPV6
+extern void inet6_proto_init(struct net_proto *pro);
 #endif
+#endif /* INET */
+
 #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
 #include <net/ipxcall.h>
 #include <net/p8022call.h>
@@ -67,6 +72,9 @@ struct net_proto protocols[] = {
 #endif  
 #ifdef CONFIG_INET
   { "INET",    inet_proto_init },                      /* TCP/IP                       */
+#ifdef CONFIG_IPV6
+  { "INET6",   inet6_proto_init},                      /* IPv6 */
+#endif
 #endif
 #ifdef  CONFIG_IPX
   { "IPX",     ipx_proto_init },                       /* IPX                          */
index d4e0dcb60b7cdff2baa7af7f34c37aefe9957271..e065428f083f3bcaccf72d495b6634b7af2dba79 100644 (file)
@@ -127,18 +127,19 @@ static int sockets_in_use  = 0;
  *     divide and look after the messy bits.
  */
 
-#define MAX_SOCK_ADDR  128             /* 108 for Unix domain - 16 for IP, 16 for IPX, about 80 for AX.25 */
+#define MAX_SOCK_ADDR  128             /* 108 for Unix domain - 
+                                          16 for IP, 16 for IPX,
+                                          24 for IPv6,
+                                          about 80 for AX.25 */
  
 int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr)
 {
-       int err;
        if(ulen<0||ulen>MAX_SOCK_ADDR)
                return -EINVAL;
        if(ulen==0)
                return 0;
-       if((err=verify_area(VERIFY_READ,uaddr,ulen))<0)
-               return err;
-       copy_from_user(kaddr,uaddr,ulen);
+       if(copy_from_user(kaddr,uaddr,ulen))
+               return -EFAULT;
        return 0;
 }
 
@@ -146,22 +147,19 @@ int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen)
 {
        int err;
        int len;
-
                
-       if((err=verify_area(VERIFY_WRITE,ulen,sizeof(*ulen)))<0)
+       if((err=get_user(len, ulen)))
                return err;
-       get_user(len,ulen);
        if(len>klen)
                len=klen;
        if(len<0 || len> MAX_SOCK_ADDR)
                return -EINVAL;
        if(len)
        {
-               if((err=verify_area(VERIFY_WRITE,uaddr,len))<0)
-                       return err;
-               copy_to_user(uaddr,kaddr,len);
+               if(copy_to_user(uaddr,kaddr,len))
+                       return -EFAULT;
        }
-       put_user(len,ulen);
+       put_user(len, ulen);
        return 0;
 }
 
@@ -1118,6 +1116,7 @@ asmlinkage int sys_sendmsg(int fd, struct msghdr *msg, unsigned int flags)
        char address[MAX_SOCK_ADDR];
        struct iovec iov[UIO_MAXIOV];
        struct msghdr msg_sys;
+       void * krn_msg_ctl = NULL;
        int err;
        int total_len;
        
@@ -1145,8 +1144,26 @@ asmlinkage int sys_sendmsg(int fd, struct msghdr *msg, unsigned int flags)
        if (err < 0)
                return err;
        total_len=err;
+               
+       if (msg_sys.msg_control)
+       {
+               krn_msg_ctl = kmalloc(msg_sys.msg_controllen, GFP_KERNEL);
+               err = copy_from_user(krn_msg_ctl, msg_sys.msg_control,
+                                    msg_sys.msg_controllen);
+               if (err)
+                       return -EFAULT;
+               msg_sys.msg_control = krn_msg_ctl;
+       }
+
+       err = sock->ops->sendmsg(sock, &msg_sys, total_len,
+                                (file->f_flags&O_NONBLOCK), flags);
+
+       if (msg_sys.msg_control)
+       {
+               kfree(krn_msg_ctl);
+       }
 
-       return sock->ops->sendmsg(sock, &msg_sys, total_len, (file->f_flags&O_NONBLOCK), flags);
+       return err;
 }
 
 /*
@@ -1159,6 +1176,8 @@ asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
        struct file *file;
        struct iovec iov[UIO_MAXIOV];
        struct msghdr msg_sys;
+       void *usr_msg_ctl = NULL;
+       void *krn_msg_ctl = NULL;
        int err;
        int total_len;
        int len;
@@ -1179,7 +1198,9 @@ asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
        err=verify_area(VERIFY_READ, msg,sizeof(struct msghdr));
        if(err)
                return err;
+
        copy_from_user(&msg_sys,msg,sizeof(struct msghdr));
+
        if(msg_sys.msg_iovlen>UIO_MAXIOV)
                return -EINVAL;
 
@@ -1194,6 +1215,19 @@ asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
                return err;
 
        total_len=err;
+
+       
+
+       if (msg_sys.msg_control)
+       {
+               usr_msg_ctl = msg_sys.msg_control;
+               krn_msg_ctl = kmalloc(msg_sys.msg_controllen, GFP_KERNEL);
+               err = copy_from_user(krn_msg_ctl, usr_msg_ctl,
+                                    msg_sys.msg_controllen);
+               if (err)
+                       return -EFAULT;
+               msg_sys.msg_control = krn_msg_ctl;
+       }
        
        if(sock->ops->recvmsg==NULL)
                return -EOPNOTSUPP;
@@ -1206,6 +1240,13 @@ asmlinkage int sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
                if (err)
                        return err;
        }
+
+       if (msg_sys.msg_control)
+       {
+               copy_to_user(usr_msg_ctl, krn_msg_ctl, msg_sys.msg_controllen);
+               kfree(krn_msg_ctl);
+       }
+
        return len;
 }
 
index bb06dab376b2a2d72ab42bc5e821e0b58b6d9f50..8792f1c4047ec93171cef12b403563f8e47669cb 100644 (file)
@@ -46,6 +46,10 @@ extern ctl_table ether_table[], e802_table[];
 extern ctl_table bridge_table[];
 #endif
 
+#ifdef CONFIG_IPV6
+extern ctl_table ipv6_table[];
+#endif
+
 ctl_table net_table[] = {
        {NET_CORE,   "core",      NULL, 0, 0555, core_table},      
         {NET_UNIX,   "unix",      NULL, 0, 0555, unix_table},
@@ -70,6 +74,9 @@ ctl_table net_table[] = {
 #endif
 #ifdef CONFIG_BRIDGE
         {NET_BRIDGE, "bridge",    NULL, 0, 0555, bridge_table},
+#endif
+#ifdef CONFIG_IPV6
+       {NET_IPV6, "ipv6", NULL, 0, 0555, ipv6_table},
 #endif
        {0}
 };
index fc5ececae627cab57dba284274d0129f7c341894..e21c3e043a29509e2a4c7e91d0f9b090d43fbc2e 100644 (file)
@@ -675,31 +675,6 @@ static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_
        return 0;
 }
 
-/*
- *     Support routines for struct cmsghdr handling
- */
-static struct cmsghdr *unix_copyrights(void *userp, int len)
-{
-       struct cmsghdr *cm;
-
-       if(len>256|| len <=0)
-               return NULL;
-       cm=kmalloc(len, GFP_KERNEL);
-       copy_from_user(cm, userp, len);
-       return cm;
-}
-
-/*
- *     Return a header block
- */
-static void unix_returnrights(void *userp, int len, struct cmsghdr *cm)
-{
-       copy_to_user(userp, cm, len);
-       kfree(cm);
-}
-
 /*
  *     Copy file descriptors into system space.
  *     Return number copied or negative error code
@@ -724,9 +699,6 @@ static int unix_fd_copy(struct sock *sk, struct cmsghdr *cmsg, struct file **fp)
                int fd;
                
                fd = fdp[i];    
-#if 0
-               printk("testing  fd %d\n", fd);
-#endif
                if (fd < 0 || fd >= NR_OPEN)
                        return -EBADF;
                if (current->files->fd[fd]==NULL)
@@ -891,18 +863,18 @@ static int unix_sendmsg(struct socket *sock, struct msghdr *msg, int len, int no
         */
        if(msg->msg_control) 
        {
-               struct cmsghdr *cm=unix_copyrights(msg->msg_control, 
-                                               msg->msg_controllen);
+               struct cmsghdr *cm = msg->msg_control;
+
                if(cm==NULL || msg->msg_controllen<sizeof(struct cmsghdr) ||
                   cm->cmsg_type!=SCM_RIGHTS ||
                   cm->cmsg_level!=SOL_SOCKET ||
                   msg->msg_controllen!=cm->cmsg_len)
                {
-                       kfree(cm);
                        return -EINVAL;
                }
-               fpnum=unix_fd_copy(sk,cm,fp);
-               kfree(cm);
+
+               fpnum = unix_fd_copy(sk, cm, fp);
+
                if(fpnum<0) {
                        return fpnum;
                }
@@ -1064,8 +1036,8 @@ static int unix_recvmsg(struct socket *sock, struct msghdr *msg, int size, int n
 
        if(msg->msg_control) 
        {
-               cm=unix_copyrights(msg->msg_control, 
-                       msg->msg_controllen);
+               cm=msg->msg_control;
+
                if(msg->msg_controllen<sizeof(struct cmsghdr)
 #if 0 
 /*             investigate this further -- Stevens example doesn't seem to care */
@@ -1076,8 +1048,7 @@ static int unix_recvmsg(struct socket *sock, struct msghdr *msg, int size, int n
 #endif
                )
                {
-                       kfree(cm);
-/*                     printk("recvmsg: Bad msg_control\n");*/
+                       printk(KERN_DEBUG "unix_recvmsg: Bad msg_control\n");
                        return -EINVAL;
                }
        }
@@ -1106,9 +1077,9 @@ static int unix_recvmsg(struct socket *sock, struct msghdr *msg, int size, int n
                                        return copied;
                                if(noblock)
                                        return -EAGAIN;
+                               unix_data_wait(sk);
                                if(current->signal & ~current->blocked)
                                        return -ERESTARTSYS;
-                               unix_data_wait(sk);
                                down(&sk->protinfo.af_unix.readsem);
                                continue;
                        }
@@ -1149,8 +1120,7 @@ static int unix_recvmsg(struct socket *sock, struct msghdr *msg, int size, int n
        }
 out:
        up(&sk->protinfo.af_unix.readsem);
-       if(cm)
-               unix_returnrights(msg->msg_control,msg->msg_controllen,cm);
+
        return copied;
 }
 
@@ -1305,7 +1275,7 @@ static struct proc_dir_entry proc_net_unix = {
 
 void unix_proto_init(struct net_proto *pro)
 {
-       printk(KERN_INFO "NET3: Unix domain sockets 0.12 for Linux NET3.035.\n");
+       printk(KERN_INFO "NET3: Unix domain sockets 0.13 for Linux NET3.035.\n");
        sock_register(unix_proto_ops.family, &unix_proto_ops);
 #ifdef CONFIG_PROC_FS
        proc_net_register(&proc_net_unix);
index ae8fea6839be95ad849e599bf07ec736e6a7d18b..6476860b7a8a0fd708e6f79f5c54faae99107fca 100644 (file)
@@ -6,18 +6,18 @@
 #
 makedev () {
        rm -f /dev/$1
-       echo mknod /dev/$1 b $2 $3
-            mknod /dev/$1 b $2 $3
+       echo mknod /dev/$1 $2 $3 $4
+            mknod /dev/$1 $2 $3 $4
        chown root:disk /dev/$1
        chmod 660 /dev/$1
 }
 
 makedevs () {
        rm -f /dev/$1*
-       makedev $1 $2 $3
+       makedev $1 $2 $3
        for part in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
        do
-               makedev $1$part $2 `expr $3 + $part`
+               makedev $1$part $2 `expr $3 + $part`
        done
 }
 
@@ -30,18 +30,8 @@ makedevs hdf 33 64
 makedevs hdg 34 0
 makedevs hdh 34 64
 
-# Create the ide-tape rewinding character device.
-
-rm -f /dev/ht0
-echo mknod /dev/ht0 c 37 0
-     mknod /dev/ht0 c 37 0
-chown root:disk /dev/ht0
-chmod 660 /dev/ht0
-
-# Create the ide-tape non rewinding character device.
-
-rm -f /dev/nht0
-echo mknod /dev/nht0 c 37 128
-     mknod /dev/nht0 c 37 128
-chown root:disk /dev/nht0
-chmod 660 /dev/nht0
+for tape in 0 1 2 3 4 5 6 7
+do
+       makedev ht$tape c 37 $tape
+       makedev nht$tape c 37 `expr $tape + 128`
+done