]> git.neil.brown.name Git - history.git/commitdiff
Linux 2.4.0-test7pre1 2.4.0-test7pre1
authorLinus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:37:53 +0000 (15:37 -0500)
committerLinus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:37:53 +0000 (15:37 -0500)
    - fix PCI resource bug that crept in in test6 due to the new
      requirements to handle multiple bus regions transparently
    - ll_rw_block documentation
    - sound driver module counting bugfix and cleanup (move to named
      initializers)
    - directory rename bug fix for busy directories (oops)
    - allow "init_new_context()" to fail - it can do so on some
      architectures when out of memory.
    - networking updates - TCP retransmission and ordering logic
    - fix strsep(). Not that anybody cared.

162 files changed:
Documentation/fb/matroxfb.txt
Documentation/networking/ip-sysctl.txt
MAINTAINERS
Makefile
arch/sh/kernel/cf-enabler.c
arch/sh/kernel/io.c
arch/sh/kernel/io_generic.c
arch/sh/kernel/mach_se.c
arch/sh/kernel/mach_unknown.c
arch/sh/kernel/setup_cqreek.c
arch/sparc/config.in
arch/sparc/defconfig
arch/sparc/mm/init.c
arch/sparc64/config.in
arch/sparc64/defconfig
arch/sparc64/kernel/ioctl32.c
arch/sparc64/kernel/sparc64_ksyms.c
drivers/block/linear.c
drivers/block/ll_rw_blk.c
drivers/block/raid0.c
drivers/char/Config.in
drivers/char/drm/ffb_drv.c
drivers/char/serial.c
drivers/net/wan/Config.in
drivers/pci/pci.ids
drivers/pci/setup-res.c
drivers/scsi/aic7xxx.c
drivers/sound/aci.c
drivers/sound/ad1816.c
drivers/sound/ad1848.c
drivers/sound/ad1848.h
drivers/sound/adlib_card.c
drivers/sound/aedsp16.c
drivers/sound/audio.c
drivers/sound/awe_wave.c
drivers/sound/cs4232.c
drivers/sound/dev_table.h
drivers/sound/gus_card.c
drivers/sound/gus_midi.c
drivers/sound/gus_wave.c
drivers/sound/ics2101.c
drivers/sound/mad16.c
drivers/sound/maui.c
drivers/sound/midi_synth.c
drivers/sound/midi_synth.h
drivers/sound/midibuf.c
drivers/sound/mpu401.c
drivers/sound/mpu401.h
drivers/sound/nm256_audio.c
drivers/sound/opl3.c
drivers/sound/opl3.h
drivers/sound/opl3sa.c
drivers/sound/opl3sa2.c
drivers/sound/pas2_card.c
drivers/sound/pas2_midi.c
drivers/sound/pas2_mixer.c
drivers/sound/pas2_pcm.c
drivers/sound/pss.c
drivers/sound/sb.h
drivers/sound/sb_audio.c
drivers/sound/sb_card.c
drivers/sound/sb_common.c
drivers/sound/sb_ess.c
drivers/sound/sb_midi.c
drivers/sound/sb_mixer.c
drivers/sound/sequencer.c
drivers/sound/sgalaxy.c
drivers/sound/skeleton.c
drivers/sound/softoss.c
drivers/sound/sound_syms.c
drivers/sound/sound_timer.c
drivers/sound/soundcard.c
drivers/sound/soundmodule.h [deleted file]
drivers/sound/sscape.c
drivers/sound/sys_timer.c
drivers/sound/trix.c
drivers/sound/uart401.c
drivers/sound/uart6850.c
drivers/sound/v_midi.c
drivers/sound/vidc.c
drivers/sound/waveartist.c
drivers/sound/wavfront.c
drivers/sound/wf_midi.c
drivers/sound/ymf_sb.c
drivers/video/matrox/matroxfb_DAC1064.c
drivers/video/matrox/matroxfb_Ti3026.c
drivers/video/matrox/matroxfb_accel.c
drivers/video/matrox/matroxfb_base.c
drivers/video/matrox/matroxfb_base.h
drivers/video/matrox/matroxfb_misc.c
fs/exec.c
fs/jffs/inode-v23.c
fs/namei.c
fs/pipe.c
include/asm-alpha/bitops.h
include/asm-alpha/mmu_context.h
include/asm-arm/mmu_context.h
include/asm-i386/mmu_context.h
include/asm-ia64/mmu_context.h
include/asm-m68k/mmu_context.h
include/asm-mips/mmu_context.h
include/asm-mips64/mmu_context.h
include/asm-ppc/mmu_context.h
include/asm-s390/mmu_context.h
include/asm-sh/dma.h
include/asm-sh/machvec_init.h
include/asm-sh/mmu_context.h
include/asm-sparc/mmu_context.h
include/asm-sparc/page.h
include/asm-sparc64/mmu_context.h
include/asm-sparc64/page.h
include/linux/highmem.h
include/linux/if_packet.h
include/linux/netfilter_ipv4/ip_conntrack_tuple.h
include/linux/netfilter_ipv4/ip_queue.h
include/linux/pci_ids.h
include/linux/rtnetlink.h
include/linux/serial.h
include/linux/skbuff.h
include/linux/sysctl.h
include/net/dst.h
include/net/snmp.h
include/net/sock.h
include/net/tcp.h
include/net/tcp_ecn.h [new file with mode: 0644]
kernel/fork.c
lib/string.c
net/core/neighbour.c
net/core/sock.c
net/ipv4/Makefile
net/ipv4/af_inet.c
net/ipv4/ip_sockglue.c
net/ipv4/netfilter/Makefile
net/ipv4/netfilter/ip_conntrack_core.c
net/ipv4/netfilter/ip_conntrack_ftp.c
net/ipv4/netfilter/ip_conntrack_standalone.c
net/ipv4/netfilter/ip_nat_core.c
net/ipv4/netfilter/ip_nat_ftp.c
net/ipv4/netfilter/ip_nat_rule.c
net/ipv4/netfilter/ip_queue.c
net/ipv4/netfilter/ipt_LOG.c
net/ipv4/netfilter/ipt_MASQUERADE.c
net/ipv4/netfilter/ipt_REJECT.c
net/ipv4/netfilter/ipt_limit.c
net/ipv4/proc.c
net/ipv4/raw.c
net/ipv4/route.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_minisocks.c [new file with mode: 0644]
net/ipv4/tcp_output.c
net/ipv4/tcp_timer.c
net/ipv4/udp.c
net/ipv6/raw.c
net/ipv6/route.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c
net/netsyms.c
net/packet/af_packet.c
net/unix/af_unix.c

index 13a641b9836460c5e4e5becf136c97515593aa28..0b8068dc5c9f492822c942e00c161b444bcc428e 100644 (file)
@@ -146,6 +146,27 @@ noinit   - tells driver, that devices were already initialized. You should use
           strange pattern on screen and so on. Devices not enabled by BIOS
           are still initialized. It is default.
 init     - driver initializes every device it knows about.
+memtype  - specifies memory type, implies 'init'. This is valid only for G200 
+           and G400 and has following meaning:
+             G200: 0 -> 2x128Kx32 chips, 2MB onboard, probably sgram
+                   1 -> 2x128Kx32 chips, 4MB onboard, probably sgram
+                   2 -> 2x256Kx32 chips, 4MB onboard, probably sgram
+                   3 -> 2x256Kx32 chips, 8MB onboard, probably sgram
+                   4 -> 2x512Kx16 chips, 8/16MB onboard, probably sdram only
+                   5 -> same as above
+                   6 -> 4x128Kx32 chips, 4MB onboard, probably sgram
+                   7 -> 4x128Kx32 chips, 8MB onboard, probably sgram
+             G400: 0 -> 2x512Kx16 SDRAM, 16/32MB
+                        2x512Kx32 SGRAM, 16/32MB
+                   1 -> 2x256Kx32 SGRAM, 8/16MB
+                   2 -> 4x128Kx32 SGRAM, 8/16MB
+                   3 -> 4x512Kx32 SDRAM, 32MB
+                   4 -> 4x256Kx32 SGRAM, 16/32MB
+                   5 -> 2x1Mx32 SDRAM, 32MB
+                   6 -> reserved
+                   7 -> reserved
+           You should use sdram or sgram parameter in addition to memtype 
+           parameter.
 nomtrr   - disables write combining on frame buffer. This slows down driver but
            there is reported minor incompatibility between GUS DMA and XFree
           under high loads if write combining is enabled (sound dropouts).
index e1436a7bb7714ee48b4bbebb6ed4ef4db5bd7911..eb28764f21b8e36a2b7322e565e7a69e5d08e217 100644 (file)
@@ -194,13 +194,84 @@ tcp_timestamps - BOOLEAN
        Enable timestamps as defined in RFC1323.
 
 tcp_sack - BOOLEAN
-       Enable select acknowledgments.
+       Enable select acknowledgments (SACKS).
+
+tcp_fack - BOOLEAN
+       Enable FACK congestion avoidance and fast restransmission.
+       The value is not used, if tcp_sack is not enabled.
+
+tcp_dsack - BOOLEAN
+       Allows TCP to send "duplicate" SACKs.
+
+tcp_ecn - BOOLEN
+       Enable Explicit Congestion Notification in TCP.
+
+tcp_reordering - INTEGER
+       Maxmimal reordering of packets in a TCP stream.
+       Default: 3      
 
 tcp_retrans_collapse - BOOLEAN
        Bug-to-bug compatibility with some broken printers.
        On retransmit try to send bigger packets to work around bugs in
        certain TCP stacks.
 
+tcp_wmem - vector of 3 INTEGERs: min, default, max
+       min: Amount of memory reserved for send buffers for TCP socket.
+       Each TCP socket has rights to use it due to fact of its birth.
+       Default: 4K
+
+       default: Amount of memory allowed for send buffers for TCP socket
+       by default. This value overrides net.core.wmem_default used
+       by other protocols, it is usually lower than net.core.wmem_default.
+       Default: 16K
+
+       max: Maximal amount of memory allowed for automatically selected
+       send buffers for TCP socket. This value does not override
+       net.core.wmem_max, "static" selection via SO_SNDBUF does not use this.
+       Default: 128K
+
+tcp_rmem - vector of 3 INTEGERs: min, default, max
+       min: Minimal size of receive buffer used by TCP sockets.
+       It is guaranteed to each TCP socket, even under moderate memory
+       pressure.
+       Default: 8K
+
+       default: default size of receive buffer used by TCP sockets.
+       This value overrides net.core.rmem_default used by other protocols.
+       Default: 87380 bytes. This value results in window of 65535 with
+       default setting of tcp_adv_win_scale and tcp_app_win:0 and a bit
+       less for default tcp_app_win. See below about these variables.
+
+       max: maximal size of receive buffer allowed for automatically
+       selected receiver buffers for TCP socket. This value does not override
+       net.core.rmem_max, "static" selection via SO_RCVBUF does not use this.
+       Default: 87380*2 bytes.
+
+tcp_mem - vector of 3 INTEGERs: min, pressure, max
+       low: below this number of pages TCP is not bothered about its
+       memory appetite.
+
+       pressure: when amount of memory allocated by TCP exceeds this number
+       of pages, TCP moderates its memory consumption and enters memory
+       pressure mode, which is exited when memory consumtion falls
+       under "low".
+
+       high: number of pages allowed for queueing by all TCP sockets.
+
+       Defaults are calculated at boot time from amount of available
+       memory.
+
+tcp_app_win - INTEGER
+       Reserve max(window/2^tcp_app_win, mss) of window for application
+       buffer. Value 0 is special, it means that nothing is reserved.
+       Default: 31
+
+tcp_adv_win_scale - INTEGER
+       Count buffering overhead as bytes/2^tcp_adv_win_scale
+       (if tcp_adv_win_scale > 0) or bytes-bytes/2^(-tcp_adv_win_scale),
+       if it is <= 0.
+       Default: 2
+
 ip_local_port_range - 2 INTEGERS
        Defines the local port range that is used by TCP and UDP to
        choose the local port. The first number is the first, the 
@@ -305,4 +376,4 @@ kuznet@ms2.inr.ac.ru
 Updated by:
 Andi Kleen
 ak@muc.de
-$Id: ip-sysctl.txt,v 1.13 2000/01/18 08:24:09 davem Exp $
+$Id: ip-sysctl.txt,v 1.15 2000/08/09 11:59:03 davem Exp $
index 6db8aad316204d6623f0735cbcfae3ff2431fcb4..22749ec2d031389645d357e0794a0e0eed2eefb2 100644 (file)
@@ -87,6 +87,13 @@ M:   ajk@iehk.rwth-aachen.de
 L:     linux-hams@vger.rutgers.edu
 S:     Maintained
 
+8250/16?50 (AND CLONE UARTS) SERIAL DRIVER
+P:     Theodore Ts'o
+M:     tytso@mit.edu
+L:     linux-serial@vger.rutgers.edu
+W:     http://serial.sourceforge.net
+S:     Maintained
+
 8390 NETWORK DRIVERS [WD80x3/SMC-ELITE, SMC-ULTRA, NE2000, 3C503, etc.]
 P:     Paul Gortmaker
 M:     p_gortmaker@yahoo.com
index e87471c17c1c29ac7cf3a0f386deb0a94e2b313b..490ed75c84113133a1d24f006a20c54d7f2c71a6 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 4
 SUBLEVEL = 0
-EXTRAVERSION = -test6
+EXTRAVERSION = -test7
 
 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
 
index c446a115179a423f80c7fc727af838a2cf8d42c8..90701ad809cb13bb367da55a988653648b1e683c 100644 (file)
@@ -8,7 +8,6 @@
  *  Enable the CF configuration.
  */
 
-#include <linux/config.h>
 #include <linux/init.h>
 
 #include <asm/io.h>
index 27f250f9fdd70c8251af0ce6f507671c379f3887..fca718ece748cea5b8901d089773f6968ed95669 100644 (file)
@@ -7,7 +7,6 @@
  * Also definitions of machine independant IO functions.
  */
 
-#include <linux/config.h>
 #include <asm/io.h>
 
 unsigned int _inb(unsigned long port)
index 9e57934505a7d2fc605734c351ac13b0a258ad84..c3d195704cec2238fd2827e0ae176d73165034eb 100644 (file)
@@ -13,7 +13,6 @@
  *
  */
 
-#include <linux/config.h>
 #include <asm/io.h>
 #include <asm/machvec.h>
 
index ce142a3990efa7cabcef6ee3ac1b37ef0b1a1553..17b09c5c43ebc2b6a8fa835839007397c02b4bd4 100644 (file)
@@ -9,6 +9,7 @@
  * Machine vector for the Hitachi SolutionEngine
  */
 
+#include <linux/config.h>
 #include <linux/init.h>
 
 #include <asm/machvec.h>
index 895a65c1d1b2275514c67a21d27b2aec277a6703..8e91632c44123ed87c3cf194e311ffd4ad873af2 100644 (file)
@@ -9,6 +9,7 @@
  * Machine specific code for an unknown machine (internal peripherials only)
  */
 
+#include <linux/config.h>
 #include <linux/init.h>
 
 #include <asm/machvec.h>
index 3e0422154d2be2d2433f868927b0f17912338c89..ea5dd7ece2a65dfc73844a437f2c98aab90b2bd0 100644 (file)
@@ -8,6 +8,7 @@
  *
  */
 
+#include <linux/config.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/irq.h>
index e603d2a641085fef5643d629d15d4b0ed4c8bbdb..8267f9f39eecfae6af77e6c08e1fb3137ae418ac 100644 (file)
@@ -1,4 +1,4 @@
-# $Id: config.in,v 1.100 2000/08/07 18:06:54 anton Exp $
+# $Id: config.in,v 1.101 2000/08/09 18:25:31 anton Exp $
 # For a description of the syntax of this configuration file,
 # see Documentation/kbuild/config-language.txt.
 #
@@ -98,7 +98,7 @@ tristate 'Multiple devices driver support' CONFIG_BLK_DEV_MD
 dep_tristate '  Linear (append) mode' CONFIG_MD_LINEAR $CONFIG_BLK_DEV_MD
 dep_tristate '  RAID-0 (striping) mode' CONFIG_MD_RAID0 $CONFIG_BLK_DEV_MD
 dep_tristate '  RAID-1 (mirroring) mode' CONFIG_MD_RAID1 $CONFIG_BLK_DEV_MD
-dep_tristate '  RAID-4/RAID-5 mode' CONFIG_MD_RAID5 $CONFIG_BLK_DEV_MD
+#dep_tristate '  RAID-4/RAID-5 mode' CONFIG_MD_RAID5 $CONFIG_BLK_DEV_MD
 #if [ "$CONFIG_MD_LINEAR" = "y" -o "$CONFIG_MD_RAID0" = "y" -o "$CONFIG_MD_RAID1" = "y" -o "$CONFIG_MD_RAID5" = "y" ]; then
 #        bool '  Boot support' CONFIG_MD_BOOT
 #        bool '  Auto Detect support' CONFIG_AUTODETECT_RAID
index 8bac6f8e3bee09c93ec12e971eaee3098fc95f36..19c9feba082278f2e488742075a5c3c699824433 100644 (file)
@@ -115,7 +115,6 @@ CONFIG_BLK_DEV_MD=m
 CONFIG_MD_LINEAR=m
 CONFIG_MD_RAID0=m
 CONFIG_MD_RAID1=m
-CONFIG_MD_RAID5=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=4096
 CONFIG_BLK_DEV_INITRD=y
index b0d45d82a60ede3908eeb8398765ce02c30923ec..b323ccacdfebe725a11441db97485d9742d06c20 100644 (file)
@@ -1,4 +1,4 @@
-/*  $Id: init.c,v 1.90 2000/08/09 00:00:15 davem Exp $
+/*  $Id: init.c,v 1.91 2000/08/09 23:10:19 anton Exp $
  *  linux/arch/sparc/mm/init.c
  *
  *  Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
@@ -456,8 +456,6 @@ void __init mem_init(void)
 #endif
 
        highmem_start_page = mem_map + highstart_pfn;
-       /* cache the highmem_mapnr */
-       highmem_mapnr = highstart_pfn;
 
        /* Saves us work later. */
        memset((void *)&empty_zero_page, 0, PAGE_SIZE);
index c57f14bb19131614b973c5ea9090a477abc26739..224052499572391aed2757bc0c4fb9e1fbf40d1b 100644 (file)
@@ -1,4 +1,4 @@
-# $Id: config.in,v 1.119 2000/08/02 10:45:03 davem Exp $
+# $Id: config.in,v 1.120 2000/08/09 08:45:39 anton Exp $
 # For a description of the syntax of this configuration file,
 # see the Configure script.
 #
@@ -86,24 +86,29 @@ comment 'Block devices'
 
 bool 'Normal floppy disk support' CONFIG_BLK_DEV_FD
 
-bool 'Multiple devices driver support' CONFIG_BLK_DEV_MD
-if [ "$CONFIG_BLK_DEV_MD" = "y" ]; then
-   tristate '  Linear (append) mode' CONFIG_MD_LINEAR
-   tristate '  RAID-0 (striping) mode' CONFIG_MD_STRIPED
-#   tristate '  RAID-1 (mirroring) mode' CONFIG_MD_MIRRORING
-#   tristate '  RAID-4/RAID-5 mode' CONFIG_MD_RAID5
-fi
+tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
+dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET
+
+#tristate 'Logical volume manager (LVM) support' CONFIG_BLK_DEV_LVM N
+#if [ "$CONFIG_BLK_DEV_LVM" != "n" ]; then
+#   bool '   LVM information in proc filesystem' CONFIG_LVM_PROC_FS Y
+#fi
+
+tristate 'Multiple devices driver support' CONFIG_BLK_DEV_MD
+dep_tristate '  Linear (append) mode' CONFIG_MD_LINEAR $CONFIG_BLK_DEV_MD
+dep_tristate '  RAID-0 (striping) mode' CONFIG_MD_RAID0 $CONFIG_BLK_DEV_MD
+dep_tristate '  RAID-1 (mirroring) mode' CONFIG_MD_RAID1 $CONFIG_BLK_DEV_MD
+#dep_tristate '  RAID-4/RAID-5 mode' CONFIG_MD_RAID5 $CONFIG_BLK_DEV_MD
+#if [ "$CONFIG_MD_LINEAR" = "y" -o "$CONFIG_MD_RAID0" = "y" -o "$CONFIG_MD_RAID1" = "y" -o "$CONFIG_MD_RAID5" = "y" ]; then
+#        bool '  Boot support' CONFIG_MD_BOOT
+#        bool '  Auto Detect support' CONFIG_AUTODETECT_RAID
+#fi
 
 tristate 'RAM disk support' CONFIG_BLK_DEV_RAM
 if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then
        int '   Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096
 fi
-if [ "$CONFIG_BLK_DEV_RAM" = "y" ]; then
-   bool '  Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD
-fi
-
-tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP
-tristate 'Network block device support' CONFIG_BLK_DEV_NBD
+dep_bool '  Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM
 
 endmenu
 
index 168109d49b0c631805401f2ceec528b6f5626c25..bbb100e8f5bc17627dc519428c72b994247fd248 100644 (file)
@@ -83,6 +83,7 @@ CONFIG_DUMMY_CONSOLE=y
 CONFIG_FB_PM2=y
 # CONFIG_FB_PM2_FIFO_DISCONNECT is not set
 CONFIG_FB_PM2_PCI=y
+# CONFIG_FB_CYBER2000 is not set
 # CONFIG_FB_MATROX is not set
 CONFIG_FB_ATY=y
 # CONFIG_FB_ATY128 is not set
@@ -135,10 +136,14 @@ CONFIG_SPARCAUDIO_CS4231=y
 # Block devices
 #
 CONFIG_BLK_DEV_FD=y
-# CONFIG_BLK_DEV_MD is not set
-# CONFIG_BLK_DEV_RAM is not set
 CONFIG_BLK_DEV_LOOP=m
 CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_BLK_DEV_INITRD is not set
 
 #
 # Networking options
index d4f49972663be18d3516b9002de01abc9ec2d6a3..2f0a2baef03c63b48f4f2567793df5e44bff0aa9 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: ioctl32.c,v 1.96 2000/08/02 06:22:35 davem Exp $
+/* $Id: ioctl32.c,v 1.97 2000/08/09 08:45:39 anton Exp $
  * ioctl32.c: Conversion between 32bit and 64bit native ioctls.
  *
  * Copyright (C) 1997-2000  Jakub Jelinek  (jakub@redhat.com)
@@ -18,9 +18,7 @@
 #include <linux/if.h>
 #include <linux/malloc.h>
 #include <linux/hdreg.h>
-#if 0 /* New RAID code is half-merged... -DaveM */
-#include <linux/md.h>
-#endif
+#include <linux/raid/md.h>
 #include <linux/kd.h>
 #include <linux/route.h>
 #include <linux/skbuff.h>
@@ -3067,17 +3065,27 @@ COMPATIBLE_IOCTL(BLKFRASET)
 COMPATIBLE_IOCTL(BLKSECTSET)
 COMPATIBLE_IOCTL(BLKSSZGET)
 
+/* RAID */
+COMPATIBLE_IOCTL(RAID_VERSION)
+COMPATIBLE_IOCTL(GET_ARRAY_INFO)
+COMPATIBLE_IOCTL(GET_DISK_INFO)
+COMPATIBLE_IOCTL(PRINT_RAID_DEBUG)
+COMPATIBLE_IOCTL(CLEAR_ARRAY)
+COMPATIBLE_IOCTL(ADD_NEW_DISK)
+COMPATIBLE_IOCTL(HOT_REMOVE_DISK)
+COMPATIBLE_IOCTL(SET_ARRAY_INFO)
+COMPATIBLE_IOCTL(SET_DISK_INFO)
+COMPATIBLE_IOCTL(WRITE_RAID_INFO)
+COMPATIBLE_IOCTL(UNPROTECT_ARRAY)
+COMPATIBLE_IOCTL(PROTECT_ARRAY)
+COMPATIBLE_IOCTL(HOT_ADD_DISK)
+COMPATIBLE_IOCTL(SET_DISK_FAULTY)
+COMPATIBLE_IOCTL(RUN_ARRAY)
+COMPATIBLE_IOCTL(START_ARRAY)
+COMPATIBLE_IOCTL(STOP_ARRAY)
+COMPATIBLE_IOCTL(STOP_ARRAY_RO)
+COMPATIBLE_IOCTL(RESTART_ARRAY_RW)
 
-#if 0  /* New RAID code is being merged, fix up to handle
-        * new RAID ioctls when fully merged in 2.3.x -DaveM
-        */
-/* 0x09 */
-COMPATIBLE_IOCTL(REGISTER_DEV)
-COMPATIBLE_IOCTL(REGISTER_DEV_NEW)
-COMPATIBLE_IOCTL(START_MD)
-COMPATIBLE_IOCTL(STOP_MD)
-#endif
-       
 /* Big K */
 COMPATIBLE_IOCTL(PIO_FONT)
 COMPATIBLE_IOCTL(GIO_FONT)
index 5bb4c8839f27e0ffaa82a58ae113bb949a5c4528..c5559ef79d86c94d3b1d91369a73d4074f2f34c0 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: sparc64_ksyms.c,v 1.91 2000/08/05 13:30:33 davem Exp $
+/* $Id: sparc64_ksyms.c,v 1.92 2000/08/09 08:45:40 anton Exp $
  * arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support.
  *
  * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
@@ -334,3 +334,7 @@ EXPORT_SYMBOL_NOVERS(memcmp);
 EXPORT_SYMBOL_NOVERS(memcpy);
 EXPORT_SYMBOL_NOVERS(memset);
 EXPORT_SYMBOL_NOVERS(memmove);
+
+void VISenter(void);
+/* RAID code needs this */
+EXPORT_SYMBOL(VISenter);
index fcd5b8b0691e6b97c9490522acae722e885125ef..855bc44dde82d4054c257b5e0f2dccebfaa79d27 100644 (file)
@@ -144,7 +144,7 @@ static int linear_make_request (mddev_t *mddev,
     
        if (block >= (tmp_dev->size + tmp_dev->offset)
                                || block < tmp_dev->offset) {
-               printk ("linear_make_request: Block %ld out of bounds on dev %s size %d offset %d\n", block, kdevname(tmp_dev->dev), tmp_dev->size, tmp_dev->offset);
+               printk ("linear_make_request: Block %ld out of bounds on dev %s size %ld offset %ld\n", block, kdevname(tmp_dev->dev), tmp_dev->size, tmp_dev->offset);
                return -1;
        }
        bh->b_rdev = tmp_dev->dev;
index 5c1fda2c3d6a5bc35ffa4a398bfa748925320107..f083a46a4ea9075c1735d950b37614c74c6cda2b 100644 (file)
@@ -182,8 +182,8 @@ static int __blk_cleanup_queue(struct list_head *head)
  *     Currently, its primary task it to free all the &struct request structures
  *     that were allocated to the queue.
  * Caveat:
- *     Hopefully the low level driver will have finished any outstanding requests
- *     first...
+ *     Hopefully the low level driver will have finished any outstanding
+ *     requests first...
  **/
 void blk_cleanup_queue(request_queue_t * q)
 {
@@ -204,21 +204,23 @@ void blk_cleanup_queue(request_queue_t * q)
  * @active:  A flag indication where the head of the queue is active.
  *
  * Description:
- *    The driver for a block device may choose to leave the currently active request
- *    on the request queue, removing it only when it has completed.  The queue
- *    handling routines assume this by default and will not involved the head of the
- *    request queue in any merging or reordering of requests.
+ *    The driver for a block device may choose to leave the currently active
+ *    request on the request queue, removing it only when it has completed.
+ *    The queue handling routines assume this by default for safety reasons
+ *    and will not involve the head of the request queue in any merging or
+ *    reordering of requests when the queue is unplugged (and thus may be
+ *    working on this particular request).
  *
- *    If a driver removes requests from the queue before processing them, then it may
- *    indicate that it does so, there by allowing the head of the queue to be involved
- *    in merging and reordering.  This is done be calling blk_queue_headactive() with an
- *    @active flag of %1.
+ *    If a driver removes requests from the queue before processing them, then
+ *    it may indicate that it does so, there by allowing the head of the queue
+ *    to be involved in merging and reordering.  This is done be calling
+ *    blk_queue_headactive() with an @active flag of %0.
  *
- *    If a driver processes several requests at once, it must remove them (or at least all
- *    but one of them) from the request queue.
+ *    If a driver processes several requests at once, it must remove them (or
+ *    at least all but one of them) from the request queue.
  *
- *    When a queue is plugged (see blk_queue_pluggable()) the head will be assumed to
- *    be inactive.
+ *    When a queue is plugged (see blk_queue_pluggable()) the head will be
+ *    assumed to be inactive.
  **/
  
 void blk_queue_headactive(request_queue_t * q, int active)
@@ -236,9 +238,9 @@ void blk_queue_headactive(request_queue_t * q, int active)
  *   is empty.  This allows a number of requests to be added before any are
  *   processed, thus providing an opportunity for these requests to be merged
  *   or re-ordered.
- *   The default plugging function (generic_plug_device()) sets the "plugged" flag
- *   for the queue and adds a task the the $tq_disk task queue to unplug the
- *   queue and call the request function at a later time.
+ *   The default plugging function (generic_plug_device()) sets the "plugged"
+ *   flag for the queue and adds a task to the $tq_disk task queue to unplug
+ *   the queue and call the request function at a later time.
  *
  *   A device driver may provide an alternate plugging function by passing it to
  *   blk_queue_pluggable().   This function should set the "plugged" flag if it
@@ -254,20 +256,20 @@ void blk_queue_pluggable (request_queue_t * q, plug_device_fn *plug)
 
 
 /**
- * blk_queue_make_request - define an alternate make_request function for a device
+ * blk_queue_make_request - define an alternate make_request function for a
+ * device
  * @q:  the request queue for the device to be affected
  * @mfn: the alternate make_request function
  *
  * Description:
- *    The normal way for &struct buffer_heads to be passes to a device driver it to
- *    collect into requests on a request queue, and allow the device driver to select
- *    requests off that queue when it is ready.  This works well for many block devices.
- *    However some block devices (typically virtual devices such as md or lvm) do not benefit
- *    from the processes on the request queue, and are served best by having the requests passed
- *    directly to them.  This can be achived by providing a function to blk_queue_make_request().
- *    If this is done, then the rest of the &request_queue_t structure is unused (unless the alternate
- *    make_request function explicitly uses it).  In particular, there is no need to call
- *    blk_init_queue() if blk_queue_make_request() has been called.
+ *    The normal way for &struct buffer_heads to be passed to a device driver
+ *    it to collect into requests on a request queue, and allow the device
+ *    driver to select requests off that queue when it is ready.  This works
+ *    well for many block devices. However some block devices (typically
+ *    virtual devices such as md or lvm) do not benefit from the processes on
+ *    the request queue, and are served best by having the requests passed
+ *    directly to them.  This can be achieved by providing a function to
+ *    blk_queue_make_request().
  **/
 
 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
@@ -390,32 +392,30 @@ static void blk_init_free_list(request_queue_t *q)
  *        placed on the queue.
  *
  * Description:
- *    If a block device wishes to use the stand request handling procedures,
- *    which sorts requests and coalesces adjactent requests, then it must
+ *    If a block device wishes to use the standard request handling procedures,
+ *    which sorts requests and coalesces adjacent requests, then it must
  *    call blk_init_queue().  The function @rfn will be called when there
  *    are requests on the queue that need to be processed.  If the device
- *    supports plugging, then @rfn may not be called immediately that requests
+ *    supports plugging, then @rfn may not be called immediately when requests
  *    are available on the queue, but may be called at some time later instead.
+ *    Plugged queues are generally unplugged when a buffer belonging to one
+ *    of the requests on the queue is needed, or due to memory pressure.
  *
- *    @rfn is not required, or even expected, to remove all requests off the queue, but
- *    only as many as it can handle at a time.  If it does leave requests on the queue,
- *    it is responsible for arranging that the requests get dealt with eventually.
+ *    @rfn is not required, or even expected, to remove all requests off the
+ *    queue, but only as many as it can handle at a time.  If it does leave
+ *    requests on the queue, it is responsible for arranging that the requests
+ *    get dealt with eventually.
  *
- *    A global spin lock $io_spin_lock must held while manipulating the requests
- *    on the request queue.
+ *    A global spin lock $io_request_lock must be held while manipulating the
+ *    requests on the request queue.
  *
- *    The request on the head of the queue is by default assumed to be potentially active,
- *    and it is not considered for re-ordering or merging.  This behaviour can
- *    be changed with blk_queue_headactive().
+ *    The request on the head of the queue is by default assumed to be
+ *    potentially active, and it is not considered for re-ordering or merging
+ *    whenever the given queue is unplugged. This behaviour can be changed with
+ *    blk_queue_headactive().
  *
  * Note:
- *    blk_init_queue() does not need to be called if
- *    blk_queue_make_request() has been called to register an alternate
- *    request handler.  Ofcourse, it may be called if the handler wants
- *    to still use the fields on &request_queue_t, but in a non-standard
- *    way.
- *
- *    blk_init_queue() should be paired with a blk_cleanup-queue() call
+ *    blk_init_queue() must be paired with a blk_cleanup-queue() call
  *    when the block device is deactivated (such as at module unload).
  **/
 static int __make_request(request_queue_t * q, int rw,  struct buffer_head * bh);
@@ -1023,6 +1023,9 @@ int __init blk_dev_init(void)
                                           sizeof(struct request),
                                           0, SLAB_HWCACHE_ALIGN, NULL, NULL);
 
+       if (!request_cachep)
+               panic("Can't create request pool slab cache\n");
+
        for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
                dev->queue = NULL;
 
index acbf07be4dc0e8623141aff4670703b0843bcaab..09f3f854762f783b946f9431cfaa8810108d6786 100644 (file)
@@ -103,7 +103,7 @@ static int create_strip_zones (mddev_t *mddev)
 
                zone->nb_dev = c;
                zone->size = (smallest->size - current_offset) * c;
-               printk(" zone->nb_dev: %d, size: %d\n",zone->nb_dev,zone->size);
+               printk(" zone->nb_dev: %d, size: %ld\n",zone->nb_dev,zone->size);
 
                if (!conf->smallest || (zone->size < conf->smallest->size))
                        conf->smallest = zone;
@@ -112,7 +112,7 @@ static int create_strip_zones (mddev_t *mddev)
                curr_zone_offset += zone->size;
 
                current_offset = smallest->size;
-               printk("current zone offset: %d\n", current_offset);
+               printk("current zone offset: %ld\n", current_offset);
        }
        printk("done.\n");
        return 0;
@@ -139,7 +139,7 @@ static int raid0_run (mddev_t *mddev)
                goto out_free_conf;
 
        printk("raid0 : md_size is %d blocks.\n", md_size[mdidx(mddev)]);
-       printk("raid0 : conf->smallest->size is %d blocks.\n", conf->smallest->size);
+       printk("raid0 : conf->smallest->size is %ld blocks.\n", conf->smallest->size);
        nb_zone = md_size[mdidx(mddev)]/conf->smallest->size +
                        (md_size[mdidx(mddev)] % conf->smallest->size ? 1 : 0);
        printk("raid0 : nb_zone is %ld.\n", nb_zone);
index 08b50aaa10fe5e8b2d253fe360095aca9fdbaf09..630a642c63be19775d80a52b04a98cc3232d2116 100644 (file)
@@ -8,7 +8,7 @@ bool 'Virtual terminal' CONFIG_VT
 if [ "$CONFIG_VT" = "y" ]; then
    bool '  Support for console on virtual terminal' CONFIG_VT_CONSOLE
 fi
-tristate 'Standard/generic (dumb) serial support' CONFIG_SERIAL
+tristate 'Standard/generic (8250/16550 and compatible UARTs) serial support' CONFIG_SERIAL
 if [ "$CONFIG_SERIAL" = "y" ]; then
    bool '  Support for console on serial port' CONFIG_SERIAL_CONSOLE
    if [ "$CONFIG_ARCH_ACORN" = "y" ]; then
index d81685230a3ad182052b894a28bd39b41e7988d5..84c7f064467418518e4f140023ce2b244102eb00 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: ffb_drv.c,v 1.5 2000/07/26 01:03:57 davem Exp $
+/* $Id: ffb_drv.c,v 1.6 2000/08/10 05:26:23 davem Exp $
  * ffb_drv.c: Creator/Creator3D direct rendering driver.
  *
  * Copyright (C) 2000 David S. Miller (davem@redhat.com)
@@ -754,6 +754,7 @@ static void align_shm_mapping(struct vm_area_struct *vma, unsigned long kvirt)
 
 extern struct vm_operations_struct drm_vm_ops;
 extern struct vm_operations_struct drm_vm_shm_ops;
+extern struct vm_operations_struct drm_vm_shm_lock_ops;
 
 static int ffb_mmap(struct file *filp, struct vm_area_struct *vma)
 {
@@ -766,7 +767,6 @@ static int ffb_mmap(struct file *filp, struct vm_area_struct *vma)
        DRM_DEBUG("start = 0x%lx, end = 0x%lx, offset = 0x%lx\n",
                  vma->vm_start, vma->vm_end, VM_OFFSET(vma));
 
-       lock_kernel();
        minor = MINOR(filp->f_dentry->d_inode->i_rdev);
        ffb_priv = NULL;
        for (i = 0; i < ffb_dev_table_size; i++) {
@@ -774,15 +774,13 @@ static int ffb_mmap(struct file *filp, struct vm_area_struct *vma)
                if (ffb_priv->miscdev.minor == minor)
                        break;
        }
-       if (i >= ffb_dev_table_size) {
-               unlock_kernel();
+       if (i >= ffb_dev_table_size)
                return -EINVAL;
-       }
+
        /* We don't support/need dma mappings, so... */
-       if (!VM_OFFSET(vma)) {
-               unlock_kernel();
+       if (!VM_OFFSET(vma))
                return -EINVAL;
-       }
+
        for (i = 0; i < dev->map_count; i++) {
                unsigned long off;
 
@@ -794,19 +792,16 @@ static int ffb_mmap(struct file *filp, struct vm_area_struct *vma)
                        break;
        }
 
-       if (i >= dev->map_count) {
-               unlock_kernel();
+       if (i >= dev->map_count)
                return -EINVAL;
-       }
+
        if (!map ||
-           ((map->flags & _DRM_RESTRICTED) && !capable(CAP_SYS_ADMIN))) {
-               unlock_kernel();
+           ((map->flags & _DRM_RESTRICTED) && !capable(CAP_SYS_ADMIN)))
                return -EPERM;
-       }
-       if (map->size != (vma->vm_end - vma->vm_start)) {
-               unlock_kernel();
+
+       if (map->size != (vma->vm_end - vma->vm_start))
                return -EINVAL;
-       }
+
        /* Set read-only attribute before mappings are created
         * so it works for fb/reg maps too.
         */
@@ -829,15 +824,19 @@ static int ffb_mmap(struct file *filp, struct vm_area_struct *vma)
                if (io_remap_page_range(vma->vm_start,
                                        ffb_priv->card_phys_base + VM_OFFSET(vma),
                                        vma->vm_end - vma->vm_start,
-                                       vma->vm_page_prot, 0)) {
-                       unlock_kernel();
+                                       vma->vm_page_prot, 0))
                        return -EAGAIN;
-               }
+
                vma->vm_ops = &drm_vm_ops;
                break;
        case _DRM_SHM:
                align_shm_mapping(vma, (unsigned long)dev->lock.hw_lock);
-               vma->vm_ops = &drm_vm_shm_ops;
+               if (map->flags & _DRM_CONTAINS_LOCK)
+                       vma->vm_ops = &drm_vm_shm_lock_ops;
+               else {
+                       vma->vm_ops = &drm_vm_shm_ops;
+                       vma->vm_private_data = (void *) map;
+               }
 
                /* Don't let this area swap.  Change when
                 * DRM_KERNEL advisory is supported.
@@ -845,10 +844,8 @@ static int ffb_mmap(struct file *filp, struct vm_area_struct *vma)
                vma->vm_flags |= VM_LOCKED;
                break;
        default:
-               unlock_kernel();
                return -EINVAL; /* This should never happen. */
        };
-       unlock_kernel();
 
        vma->vm_flags |= VM_LOCKED | VM_SHM; /* Don't swap */
 
index d116e2a9103f91fb1bafd0fc4e6815185051f81e..8fb32fb5a595da96703fbd726c4d51f553b77a33 100644 (file)
  *
  *  7/00: Support Timedia/Sunix/Exsys PCI cards
  *
+ *  7/00: fix some returns on failure not using MOD_DEC_USE_COUNT.
+ *       Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
  * This module exports the following rs232 io functions:
  *
  *     int rs_init(void);
  */
 
-static char *serial_version = "5.01";
-static char *serial_revdate = "2000-05-29";
+static char *serial_version = "5.02";
+static char *serial_revdate = "2000-08-09";
 
 /*
  * Serial driver configuration section.  Here are the various options:
@@ -142,6 +145,10 @@ static char *serial_revdate = "2000-05-29";
 #endif
 #endif
 
+#ifdef MODULE
+#undef CONFIG_SERIAL_CONSOLE
+#endif
+
 #define CONFIG_SERIAL_RSA
 
 #define RS_STROBE_TIME (10*HZ)
@@ -260,8 +267,9 @@ static struct rs_multiport_struct rs_multiport[NR_IRQS];
 static int IRQ_timeout[NR_IRQS];
 #ifdef CONFIG_SERIAL_CONSOLE
 static struct console sercons;
+static int lsr_break_flag = 0;
 #endif
-#if defined(CONFIG_SERIAL_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) && !defined(MODULE)
+#if defined(CONFIG_SERIAL_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
 static unsigned long break_pressed; /* break, really ... */
 #endif
 
@@ -281,7 +289,7 @@ static struct serial_uart_config uart_config[] = {
        { "16550", 1, 0 }, 
        { "16550A", 16, UART_CLEAR_FIFO | UART_USE_FIFO }, 
        { "cirrus", 1, 0 },     /* usurped by cyclades.c */
-       { "ST16650", 1, UART_CLEAR_FIFO |UART_STARTECH }, 
+       { "ST16650", 1, UART_CLEAR_FIFO | UART_STARTECH }, 
        { "ST16650V2", 32, UART_CLEAR_FIFO | UART_USE_FIFO |
                  UART_STARTECH }, 
        { "TI16750", 64, UART_CLEAR_FIFO | UART_USE_FIFO},
@@ -295,14 +303,18 @@ static struct serial_uart_config uart_config[] = {
        { 0, 0}
 };
 
-#if defined(CONFIG_SERIAL_RSA) && defined(MODULE)
+#ifdef CONFIG_SERIAL_RSA
 
 #define PORT_RSA_MAX 4
 static int probe_rsa[PORT_RSA_MAX];
 static int force_rsa[PORT_RSA_MAX];
 
+#ifdef MODULE
 MODULE_PARM(probe_rsa, "1-" __MODULE_STRING(PORT_RSA_MAX) "i");
+MODULE_PARM_DESC(probe_rsa, "Probe I/O ports for RSA");
 MODULE_PARM(force_rsa, "1-" __MODULE_STRING(PORT_RSA_MAX) "i");
+MODULE_PARM_DESC(force_rsa, "Force I/O ports for RSA");
+#endif
 #endif /* CONFIG_SERIAL_RSA  */
 
 static struct serial_state rs_table[RS_TABLE_SIZE] = {
@@ -313,7 +325,7 @@ static struct serial_state rs_table[RS_TABLE_SIZE] = {
 
 #if (defined(ENABLE_SERIAL_PCI) || defined(ENABLE_SERIAL_PNP))
 #define NR_PCI_BOARDS  8
-/* We don't unregister PCI boards right now */
+
 static struct pci_board_inst   serial_pci_board[NR_PCI_BOARDS];
 static int serial_pci_board_idx = 0;
 
@@ -573,6 +585,23 @@ static _INLINE_ void receive_chars(struct async_struct *info,
                        if (*status & UART_LSR_BI) {
                                *status &= ~(UART_LSR_FE | UART_LSR_PE);
                                icount->brk++;
+                               /*
+                                * We do the SysRQ and SAK checking
+                                * here because otherwise the break
+                                * may get masked by ignore_status_mask
+                                * or read_status_mask.
+                                */
+#if defined(CONFIG_SERIAL_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+                               if (info->line == sercons.index) {
+                                       if (!break_pressed) {
+                                               break_pressed = jiffies;
+                                               goto ignore_char;
+                                       }
+                                       break_pressed = 0;
+                               }
+#endif
+                               if (info->flags & ASYNC_SAK)
+                                       do_SAK(tty);
                        } else if (*status & UART_LSR_PE)
                                icount->parity++;
                        else if (*status & UART_LSR_FE)
@@ -591,23 +620,19 @@ static _INLINE_ void receive_chars(struct async_struct *info,
                                goto ignore_char;
                        }
                        *status &= info->read_status_mask;
-               
+
+#ifdef CONFIG_SERIAL_CONSOLE
+                       if (info->line == sercons.index) {
+                               /* Recover the break flag from console xmit */
+                               *status |= lsr_break_flag;
+                               lsr_break_flag = 0;
+                       }
+#endif
                        if (*status & (UART_LSR_BI)) {
 #ifdef SERIAL_DEBUG_INTR
                                printk("handling break....");
-#endif
-#if defined(CONFIG_SERIAL_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) && !defined(MODULE)
-                               if (info->line == sercons.index) {
-                                       if (!break_pressed) {
-                                               break_pressed = jiffies;
-                                               goto ignore_char;
-                                       }
-                                       break_pressed = 0;
-                               }
 #endif
                                *tty->flip.flag_buf_ptr = TTY_BREAK;
-                               if (info->flags & ASYNC_SAK)
-                                       do_SAK(tty);
                        } else if (*status & UART_LSR_PE)
                                *tty->flip.flag_buf_ptr = TTY_PARITY;
                        else if (*status & UART_LSR_FE)
@@ -626,7 +651,7 @@ static _INLINE_ void receive_chars(struct async_struct *info,
                                        goto ignore_char;
                        }
                }
-#if defined(CONFIG_SERIAL_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) && !defined(MODULE)
+#if defined(CONFIG_SERIAL_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
                if (break_pressed && info->line == sercons.index) {
                        if (ch != 0 &&
                            time_before(jiffies, break_pressed + HZ*5)) {
@@ -1076,7 +1101,7 @@ static void rs_timer(unsigned long dummy)
 #endif
                restore_flags(flags);
 
-               mod_timer(&serial_timer, jiffies + IRQ_timeout[0] - 2);
+               mod_timer(&serial_timer, jiffies + IRQ_timeout[0]);
        }
 }
 
@@ -1111,7 +1136,7 @@ static void figure_IRQ_timeout(int irq)
        }
        if (!irq)
                timeout = timeout / 2;
-       IRQ_timeout[irq] = timeout ? timeout : 1;
+       IRQ_timeout[irq] = (timeout > 3) ? timeout-2 : 1;
 }
 
 #ifdef CONFIG_SERIAL_RSA
@@ -2077,7 +2102,7 @@ static int set_serial_info(struct async_struct * info,
 
        new_serial.irq = irq_cannonicalize(new_serial.irq);
 
-       if ((new_serial.irq >= NR_IRQS) || 
+       if ((new_serial.irq >= NR_IRQS) || (new_serial.irq < 0) || 
            (new_serial.baud_base < 9600)|| (new_serial.type < PORT_UNKNOWN) ||
            (new_serial.type > PORT_MAX) || (new_serial.type == PORT_CIRRUS) ||
            (new_serial.type == PORT_STARTECH)) {
@@ -2310,7 +2335,7 @@ static int set_modem_info(struct async_struct * info, unsigned int cmd,
 
 static int do_autoconfig(struct async_struct * info)
 {
-       int                     retval;
+       int irq, retval;
        
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
@@ -2323,8 +2348,11 @@ static int do_autoconfig(struct async_struct * info)
        autoconfig(info->state);
        if ((info->state->flags & ASYNC_AUTO_IRQ) &&
            (info->state->port != 0) &&
-           (info->state->type != PORT_UNKNOWN))
-               info->state->irq = detect_uart_irq(info->state);
+           (info->state->type != PORT_UNKNOWN)) {
+               irq = detect_uart_irq(info->state);
+               if (irq > 0)
+                       info->state->irq = irq;
+       }
 
        retval = startup(info);
        if (retval)
@@ -3111,8 +3139,10 @@ static int rs_open(struct tty_struct *tty, struct file * filp)
        }
        tty->driver_data = info;
        info->tty = tty;
-       if (serial_paranoia_check(info, tty->device, "rs_open"))
+       if (serial_paranoia_check(info, tty->device, "rs_open")) {
+               MOD_DEC_USE_COUNT;              
                return -ENODEV;
+       }
 
 #ifdef SERIAL_DEBUG_OPEN
        printk("rs_open %s%d, count = %d\n", tty->driver.name, info->line,
@@ -3125,6 +3155,7 @@ static int rs_open(struct tty_struct *tty, struct file * filp)
        if (!tmp_buf) {
                page = get_zeroed_page(GFP_KERNEL);
                if (!page) {
+                       MOD_DEC_USE_COUNT;
                        return -ENOMEM;
                }
                if (tmp_buf)
@@ -3140,6 +3171,7 @@ static int rs_open(struct tty_struct *tty, struct file * filp)
            (info->flags & ASYNC_CLOSING)) {
                if (info->flags & ASYNC_CLOSING)
                        interruptible_sleep_on(&info->close_wait);
+               MOD_DEC_USE_COUNT;
 #ifdef SERIAL_DO_RESTART
                return ((info->flags & ASYNC_HUP_NOTIFY) ?
                        -EAGAIN : -ERESTARTSYS);
@@ -3153,6 +3185,7 @@ static int rs_open(struct tty_struct *tty, struct file * filp)
         */
        retval = startup(info);
        if (retval) {
+               MOD_DEC_USE_COUNT;
                return retval;
        }
 
@@ -3162,6 +3195,7 @@ static int rs_open(struct tty_struct *tty, struct file * filp)
                printk("rs_open returning after block_til_ready with %d\n",
                       retval);
 #endif
+               MOD_DEC_USE_COUNT;
                return retval;
        }
 
@@ -3486,6 +3520,7 @@ static void autoconfig_startech_uarts(struct async_struct *info,
                 * (Exoray@isys.ca) claims that it's needed for 952
                 * dual UART's (which are not recommended for new designs).
                 */
+               info->ACR = 0;
                serial_out(info, UART_LCR, 0xBF);
                serial_out(info, UART_EFR, 0x10);
                serial_out(info, UART_LCR, 0x00);
@@ -3804,10 +3839,11 @@ static _INLINE_ int get_pci_port(struct pci_dev *dev,
                if (idx >= max_port)
                        return 1;
        }
-
+                       
        offset = board->first_uart_offset;
 
        /* Timedia/SUNIX uses a mixture of BARs and offsets */
+       /* Ugh, this is ugly as all hell --- TYT */
        if(dev->vendor == PCI_VENDOR_ID_TIMEDIA )  /* 0x1409 */
                switch(idx) {
                        case 0: base_idx=0;
@@ -4090,6 +4126,62 @@ pci_inteli960ni_fn(struct pci_dev *dev,
        return(0);
 }
 
+/*
+ * Timedia has an explosion of boards, and to avoid the PCI table from
+ * growing *huge*, we use this function to collapse some 70 entries
+ * in the PCI table into one, for sanity's and compactness's sake.
+ */
+static unsigned short timedia_single_port[] = {
+       0x4025, 0x4027, 0x4028, 0x5025, 0x5027, 0 };
+static unsigned short timedia_dual_port[] = {
+       0x0002, 0x4036, 0x4037, 0x4038, 0x4078, 0x4079, 0x4085,
+       0x4088, 0x4089, 0x5037, 0x5078, 0x5079, 0x5085, 0x6079, 
+       0x7079, 0x8079, 0x8137, 0x8138, 0x8237, 0x8238, 0x9079, 
+       0x9137, 0x9138, 0x9237, 0x9238, 0xA079, 0xB079, 0xC079,
+       0xD079, 0 };
+static unsigned short timedia_quad_port[] = {
+       0x4055, 0x4056, 0x4095, 0x4096, 0x5056, 0x8156, 0x8157, 
+       0x8256, 0x8257, 0x9056, 0x9156, 0x9157, 0x9158, 0x9159, 
+       0x9256, 0x9257, 0xA056, 0xA157, 0xA158, 0xA159, 0xB056,
+       0xB157, 0 };
+static unsigned short timedia_eight_port[] = {
+       0x4065, 0x4066, 0x5065, 0x5066, 0x8166, 0x9066, 0x9166, 
+       0x9167, 0x9168, 0xA066, 0xA167, 0xA168, 0 };
+static struct timedia_struct {
+       int num;
+       unsigned short *ids;
+} timedia_data[] = {
+       { 1, timedia_single_port },
+       { 2, timedia_dual_port },
+       { 4, timedia_quad_port },
+       { 8, timedia_eight_port },
+       { 0, 0 }
+};
+
+static int
+#ifndef MODULE
+__init
+#endif
+pci_timedia_fn(struct pci_dev *dev, struct pci_board *board, int enable)
+{
+       int     i, j;
+       unsigned short *ids;
+
+       if (!enable)
+               return 0;
+
+       for (i=0; timedia_data[i].num; i++) {
+               ids = timedia_data[i].ids;
+               for (j=0; ids[j]; j++) {
+                       if (pci_get_subvendor(dev) == ids[j]) {
+                               board->num_ports = timedia_data[i].num;
+                               return 0;
+                       }
+               }
+       }
+       return 0;
+}
+
 
 /*
  * This is the configuration table for all of the PCI serial boards
@@ -4116,58 +4208,50 @@ static struct pci_board pci_boards[] __initdata = {
                PCI_SUBVENDOR_ID_CONNECT_TECH,
                PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_232,
                SPCI_FL_BASE1, 2, 1382400 },
-       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V960V2,
+       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V351,
                PCI_SUBVENDOR_ID_CONNECT_TECH,
                PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_232,
                SPCI_FL_BASE1, 8, 1382400 },
-       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V960V2,
+       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V351,
                PCI_SUBVENDOR_ID_CONNECT_TECH,
                PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_232,
                SPCI_FL_BASE1, 4, 1382400 },
-       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V960V2,
+       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V351,
                PCI_SUBVENDOR_ID_CONNECT_TECH,
                PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_232,
                SPCI_FL_BASE1, 2, 1382400 },
-       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V960,
+       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V351,
                PCI_SUBVENDOR_ID_CONNECT_TECH,
                PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485,
                SPCI_FL_BASE1, 8, 921600 },
-       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V960,
+       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V351,
                PCI_SUBVENDOR_ID_CONNECT_TECH,
                PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485_4_4,
                SPCI_FL_BASE1, 8, 921600 },
-       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V960,
+       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V351,
                PCI_SUBVENDOR_ID_CONNECT_TECH,
                PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_485,
                SPCI_FL_BASE1, 4, 921600 },
-       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V960,
+       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V351,
                PCI_SUBVENDOR_ID_CONNECT_TECH,
                PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_485_2_2,
                SPCI_FL_BASE1, 4, 921600 },
-       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V960,
+       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V351,
                PCI_SUBVENDOR_ID_CONNECT_TECH,
                PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_485,
                SPCI_FL_BASE1, 2, 921600 },
-       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V960V2,
+       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V351,
                PCI_SUBVENDOR_ID_CONNECT_TECH,
-               PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485,
+               PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485_2_6,
                SPCI_FL_BASE1, 8, 921600 },
-       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V960V2,
+       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V351,
                PCI_SUBVENDOR_ID_CONNECT_TECH,
-               PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485_4_4,
+               PCI_SUBDEVICE_ID_CONNECT_TECH_BH081101V1,
                SPCI_FL_BASE1, 8, 921600 },
-       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V960V2,
+       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V351,
                PCI_SUBVENDOR_ID_CONNECT_TECH,
-               PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_485,
+               PCI_SUBDEVICE_ID_CONNECT_TECH_BH041101V1,
                SPCI_FL_BASE1, 4, 921600 },
-       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V960V2,
-               PCI_SUBVENDOR_ID_CONNECT_TECH,
-               PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_485_2_2,
-               SPCI_FL_BASE1, 4, 921600 },
-       {       PCI_VENDOR_ID_V3, PCI_DEVICE_ID_V3_V960V2,
-               PCI_SUBVENDOR_ID_CONNECT_TECH,
-               PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_485,
-               SPCI_FL_BASE1, 2, 921600 },
        {       PCI_VENDOR_ID_SEALEVEL, PCI_DEVICE_ID_SEALEVEL_U530,
                PCI_ANY_ID, PCI_ANY_ID,
                SPCI_FL_BASE2 | SPCI_FL_BASE_TABLE, 1, 115200 },
@@ -4196,6 +4280,9 @@ static struct pci_board pci_boards[] __initdata = {
        {       PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_SPCOM800, 
                PCI_ANY_ID, PCI_ANY_ID,
                SPCI_FL_BASE2, 8, 921600 },
+       {       PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_1077,
+               PCI_ANY_ID, PCI_ANY_ID,
+               SPCI_FL_BASE2, 4, 921600 },
        {       PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050,
                PCI_SUBVENDOR_ID_KEYSPAN,
                PCI_SUBDEVICE_ID_KEYSPAN_SX2,
@@ -4265,75 +4352,10 @@ static struct pci_board pci_boards[] __initdata = {
        {       PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI95N,
                PCI_ANY_ID, PCI_ANY_ID,
                SPCI_FL_BASE0 | SPCI_FL_REGION_SZ_CAP, 32, 115200 },
-       /*      PCI_VENDOR_ID_TIMEDIA/Sunix, PCI_DEVICE_ID_TIMEDIA_1889, */
-       {       0x1409, 0x7168, 0x1409, 0x0002, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4036A*/
-       {       0x1409, 0x7168, 0x1409, 0x4025, SPCI_FL_BASE_TABLE, 1, 921600 }, /*4025A*/
-       {       0x1409, 0x7168, 0x1409, 0x4027, SPCI_FL_BASE_TABLE, 1, 921600 }, /*4027A*/
-       {       0x1409, 0x7168, 0x1409, 0x4028, SPCI_FL_BASE_TABLE, 1, 921600 }, /*4028D*/
-       {       0x1409, 0x7168, 0x1409, 0x4036, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4036D*/
-       {       0x1409, 0x7168, 0x1409, 0x4037, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4037A*/
-       {       0x1409, 0x7168, 0x1409, 0x4038, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4038D*/
-       {       0x1409, 0x7168, 0x1409, 0x4055, SPCI_FL_BASE_TABLE, 4, 921600 }, /*4055A*/
-       {       0x1409, 0x7168, 0x1409, 0x4056, SPCI_FL_BASE_TABLE, 4, 921600 }, /*4056A*/
-       {       0x1409, 0x7168, 0x1409, 0x4065, SPCI_FL_BASE_TABLE, 8, 921600 }, /*4065A*/
-       {       0x1409, 0x7168, 0x1409, 0x4066, SPCI_FL_BASE_TABLE, 8, 921600 }, /*4066A*/
-       {       0x1409, 0x7168, 0x1409, 0x4078, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4078A*/
-       {       0x1409, 0x7168, 0x1409, 0x4079, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4079H*/
-       {       0x1409, 0x7168, 0x1409, 0x4085, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4085H*/
-       {       0x1409, 0x7168, 0x1409, 0x4088, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4088A*/
-       {       0x1409, 0x7168, 0x1409, 0x4089, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4089A*/
-       {       0x1409, 0x7168, 0x1409, 0x4095, SPCI_FL_BASE_TABLE, 4, 921600 }, /*4095A*/
-       {       0x1409, 0x7168, 0x1409, 0x4096, SPCI_FL_BASE_TABLE, 4, 921600 }, /*4096A*/
-       {       0x1409, 0x7168, 0x1409, 0x5025, SPCI_FL_BASE_TABLE, 1, 921600 }, /*4025D*/
-       {       0x1409, 0x7168, 0x1409, 0x5027, SPCI_FL_BASE_TABLE, 1, 921600 }, /*4027D*/
-       {       0x1409, 0x7168, 0x1409, 0x5037, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4037D*/
-       {       0x1409, 0x7168, 0x1409, 0x5056, SPCI_FL_BASE_TABLE, 4, 921600 }, /*4056R*/
-       {       0x1409, 0x7168, 0x1409, 0x5065, SPCI_FL_BASE_TABLE, 8, 921600 }, /*4065R*/
-       {       0x1409, 0x7168, 0x1409, 0x5066, SPCI_FL_BASE_TABLE, 8, 921600 }, /*4066R*/
-       {       0x1409, 0x7168, 0x1409, 0x5078, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4078U*/
-       {       0x1409, 0x7168, 0x1409, 0x5079, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4079A*/
-       {       0x1409, 0x7168, 0x1409, 0x5085, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4085U*/
-       {       0x1409, 0x7168, 0x1409, 0x6079, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4079R*/
-       {       0x1409, 0x7168, 0x1409, 0x7079, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4079S*/
-       {       0x1409, 0x7168, 0x1409, 0x8079, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4079D*/
-       {       0x1409, 0x7168, 0x1409, 0x8137, SPCI_FL_BASE_TABLE, 2, 921600 }, /*8137*/
-       {       0x1409, 0x7168, 0x1409, 0x8138, SPCI_FL_BASE_TABLE, 2, 921600 }, /*8138*/
-       {       0x1409, 0x7168, 0x1409, 0x8156, SPCI_FL_BASE_TABLE, 4, 921600 }, /*8156*/
-       {       0x1409, 0x7168, 0x1409, 0x8157, SPCI_FL_BASE_TABLE, 4, 921600 }, /*8157*/
-       {       0x1409, 0x7168, 0x1409, 0x8166, SPCI_FL_BASE_TABLE, 8, 921600 }, /*8166*/
-       {       0x1409, 0x7168, 0x1409, 0x8237, SPCI_FL_BASE_TABLE, 2, 921600 }, /*8237*/
-       {       0x1409, 0x7168, 0x1409, 0x8238, SPCI_FL_BASE_TABLE, 2, 921600 }, /*8238*/
-       {       0x1409, 0x7168, 0x1409, 0x8256, SPCI_FL_BASE_TABLE, 4, 921600 }, /*8256*/
-       {       0x1409, 0x7168, 0x1409, 0x8257, SPCI_FL_BASE_TABLE, 4, 921600 }, /*8257*/
-       {       0x1409, 0x7168, 0x1409, 0x9056, SPCI_FL_BASE_TABLE, 4, 921600 }, /*9056A*/
-       {       0x1409, 0x7168, 0x1409, 0x9066, SPCI_FL_BASE_TABLE, 8, 921600 }, /*9066A*/
-       {       0x1409, 0x7168, 0x1409, 0x9079, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4079E*/
-       {       0x1409, 0x7168, 0x1409, 0x9137, SPCI_FL_BASE_TABLE, 2, 921600 }, /*8137S*/
-       {       0x1409, 0x7168, 0x1409, 0x9138, SPCI_FL_BASE_TABLE, 2, 921600 }, /*8138S*/
-       {       0x1409, 0x7168, 0x1409, 0x9156, SPCI_FL_BASE_TABLE, 4, 921600 }, /*8156S*/
-       {       0x1409, 0x7168, 0x1409, 0x9157, SPCI_FL_BASE_TABLE, 4, 921600 }, /*8157S*/
-       {       0x1409, 0x7168, 0x1409, 0x9158, SPCI_FL_BASE_TABLE, 4, 921600 }, /*9158*/
-       {       0x1409, 0x7168, 0x1409, 0x9159, SPCI_FL_BASE_TABLE, 4, 921600 }, /*9159*/
-       {       0x1409, 0x7168, 0x1409, 0x9166, SPCI_FL_BASE_TABLE, 8, 921600 }, /*8166S*/
-       {       0x1409, 0x7168, 0x1409, 0x9167, SPCI_FL_BASE_TABLE, 8, 921600 }, /*9167*/
-       {       0x1409, 0x7168, 0x1409, 0x9168, SPCI_FL_BASE_TABLE, 8, 921600 }, /*9168*/
-       {       0x1409, 0x7168, 0x1409, 0x9237, SPCI_FL_BASE_TABLE, 2, 921600 }, /*8237S*/
-       {       0x1409, 0x7168, 0x1409, 0x9238, SPCI_FL_BASE_TABLE, 2, 921600 }, /*8238S*/
-       {       0x1409, 0x7168, 0x1409, 0x9256, SPCI_FL_BASE_TABLE, 4, 921600 }, /*8256S*/
-       {       0x1409, 0x7168, 0x1409, 0x9257, SPCI_FL_BASE_TABLE, 4, 921600 }, /*8257S*/
-       {       0x1409, 0x7168, 0x1409, 0xA056, SPCI_FL_BASE_TABLE, 4, 921600 }, /*9056B*/
-       {       0x1409, 0x7168, 0x1409, 0xA066, SPCI_FL_BASE_TABLE, 8, 921600 }, /*9066B*/
-       {       0x1409, 0x7168, 0x1409, 0xA079, SPCI_FL_BASE_TABLE, 2, 921600 }, /*4079F*/
-       {       0x1409, 0x7168, 0x1409, 0xA157, SPCI_FL_BASE_TABLE, 4, 921600 }, /*9157*/
-       {       0x1409, 0x7168, 0x1409, 0xA158, SPCI_FL_BASE_TABLE, 4, 921600 }, /*9158S*/
-       {       0x1409, 0x7168, 0x1409, 0xA159, SPCI_FL_BASE_TABLE, 4, 921600 }, /*9159S*/
-       {       0x1409, 0x7168, 0x1409, 0xA167, SPCI_FL_BASE_TABLE, 8, 921600 }, /*9167S*/
-       {       0x1409, 0x7168, 0x1409, 0xA168, SPCI_FL_BASE_TABLE, 8, 921600 }, /*9168S*/
-       {       0x1409, 0x7168, 0x1409, 0xB056, SPCI_FL_BASE_TABLE, 4, 921600 }, /*9056C*/
-       {       0x1409, 0x7168, 0x1409, 0xB079, SPCI_FL_BASE_TABLE, 2, 921600 }, /*9079A*/
-       {       0x1409, 0x7168, 0x1409, 0xB157, SPCI_FL_BASE_TABLE, 4, 921600 }, /*9157S*/
-       {       0x1409, 0x7168, 0x1409, 0xC079, SPCI_FL_BASE_TABLE, 2, 921600 }, /*9079B*/
-       {       0x1409, 0x7168, 0x1409, 0xD079, SPCI_FL_BASE_TABLE, 2, 921600 }, /*9079C*/
+       {       PCI_VENDOR_ID_TIMEDIA, PCI_DEVICE_ID_TIMEDIA_1889,
+               PCI_VENDOR_ID_TIMEDIA, PCI_ANY_ID,
+               SPCI_FL_BASE_TABLE, 1, 921600,
+               0, 0, pci_timedia_fn },
        {       PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_DSERIAL,
                PCI_ANY_ID, PCI_ANY_ID,
                SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 2, 115200 },
@@ -4540,7 +4562,7 @@ static struct pci_board pci_boards[] __initdata = {
        {       PCI_VENDOR_ID_ROCKWELL, 0x1004,
                0x1048, 0x1500, 
                SPCI_FL_BASE1, 1, 115200 },
-#ifdef CONFIG_DDB5074
+#if CONFIG_DDB5074
        /*
         * NEC Vrc-5074 (Nile 4) builtin UART.
         * Conditionally compiled in since this is a motherboard device.
@@ -4580,7 +4602,7 @@ static int _INLINE_ serial_pci_guess_board(struct pci_dev *dev,
 
        for (i=0; i < 6; i++) {
                if (IS_PCI_REGION_IOPORT(dev, i)) {
-                       num_port = 0;
+                       num_port++;
                        if (first_port == -1)
                                first_port = i;
                } else {
@@ -5077,16 +5099,6 @@ static int __init rs_init(void)
        int i;
        struct serial_state * state;
 
-       if (serial_timer.function) {
-               printk("RS_TIMER already set, another serial driver "
-                      "already loaded?\n");
-#ifdef MODULE
-               printk("Can't load serial driver module over built-in "
-                      "serial driver\n");
-#endif
-               return -EBUSY;
-       }
-
        init_bh(SERIAL_BH, do_serial_bh);
        init_timer(&serial_timer);
        serial_timer.function = rs_timer;
@@ -5419,6 +5431,8 @@ static void __exit rs_fini(void)
 
 module_init(rs_init);
 module_exit(rs_fini);
+MODULE_DESCRIPTION("Standard/generic (dumb) serial driver");
+MODULE_AUTHOR("Theodore Ts'o <tytso@mit.edu>");
 
 
 /*
@@ -5437,10 +5451,17 @@ static struct async_struct async_sercons;
  */
 static inline void wait_for_xmitr(struct async_struct *info)
 {
-       unsigned int tmout = 1000000;
+       unsigned int status, tmout = 1000000;
+
+       do {
+               status = serial_in(info, UART_LSR);
 
-       while (--tmout &&
-              ((serial_in(info, UART_LSR) & BOTH_EMPTY) != BOTH_EMPTY));
+               if (status & UART_LSR_BI)
+                       lsr_break_flag = UART_LSR_BI;
+               
+               if (--tmout == 0)
+                       break;
+       } while((status & BOTH_EMPTY) != BOTH_EMPTY);
 }
 
 
index 4621d3981b96557d8243bfa70c47f4890e2998c3..c6066f67f0130e0413636d09a34e67f7edf4c624 100644 (file)
@@ -80,8 +80,10 @@ if [ "$CONFIG_WAN" = "y" ]; then
 
        # X.25 network drivers
 
-       dep_tristate 'LAPB over Ethernet driver' CONFIG_LAPBETHER $CONFIG_LAPB $CONFIG_X25
-       dep_tristate 'X.25 async driver' CONFIG_X25_ASY $CONFIG_LAPB $CONFIG_X25
+       if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+          dep_tristate 'LAPB over Ethernet driver' CONFIG_LAPBETHER $CONFIG_LAPB $CONFIG_X25
+          dep_tristate 'X.25 async driver' CONFIG_X25_ASY $CONFIG_LAPB $CONFIG_X25
+       fi
 
        if [ "$CONFIG_X86" = "y" ]; then
           tristate 'SBNI12-xx support' CONFIG_SBNI
index 0c3a2f53364932cc75805ba4d52d10680bc54fa3..62e06d38f88b29cfa72ed1f7132aa565e4f32f1f 100644 (file)
                1106 0686  VT82C686/A PCI to ISA Bridge
        0691  VT82C693A/694x [Apollo PRO133x]
                1458 0691  VT82C691 Apollo Pro System Controller
-       0698  VT82C693A [Ppollo Pro133 AGP]
+       0698  VT82C693A [Apollo Pro133 AGP]
        0693  VT82C693 [Apollo Pro Plus]
        0926  VT82C926 [Amazon]
        1000  VT82C570MV
index 9fac0e1ce4a3d14b8b4af529996c4e2825d7c228..1a95544d8f31dfb9c1fc8084954b951176539a31 100644 (file)
@@ -60,7 +60,8 @@ static int pci_assign_bus_resource(const struct pci_bus *bus,
        struct resource *res,
        unsigned long size,
        unsigned long min,
-       unsigned int type_mask)
+       unsigned int type_mask,
+       int resno)
 {
        int i;
 
@@ -83,7 +84,7 @@ static int pci_assign_bus_resource(const struct pci_bus *bus,
                        continue;
 
                /* Update PCI config space.  */
-               pcibios_update_resource(dev, r, res, i);
+               pcibios_update_resource(dev, r, res, resno);
                return 0;
        }
        return -EBUSY;
@@ -100,14 +101,14 @@ pci_assign_resource(struct pci_dev *dev, int i)
        min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM;
 
        /* First, try exact prefetching match.. */
-       if (pci_assign_bus_resource(bus, dev, res, size, min, IORESOURCE_PREFETCH) < 0) {
+       if (pci_assign_bus_resource(bus, dev, res, size, min, IORESOURCE_PREFETCH, i) < 0) {
                /*
                 * That failed.
                 *
                 * But a prefetching area can handle a non-prefetching
                 * window (it will just not perform as well).
                 */
-               if (!(res->flags & IORESOURCE_PREFETCH) || pci_assign_bus_resource(bus, dev, res, size, min, 0) < 0) {
+               if (!(res->flags & IORESOURCE_PREFETCH) || pci_assign_bus_resource(bus, dev, res, size, min, 0, i) < 0) {
                        printk(KERN_ERR "PCI: Failed to allocate resource %d for %s\n", i, dev->name);
                        return -EBUSY;
                }
index 7cbbc3f1f4e6a1ac08d2e6b7d0193341d2e34704..655107348649398f73d002de85eb4dd1ef7606a1 100644 (file)
 #  define FALSE 0
 #endif
 
-#if defined(__powerpc__) || defined(__i386)
+#if defined(__powerpc__) || defined(__i386__)
 #  define MMAPIO
 #endif
 
index 6970bec0d8bcd584b60299198e8b0748659b38af..5c3045f2a8017196b338dbb69d209b5e06cbf42b 100644 (file)
@@ -537,10 +537,10 @@ aci_mixer_ioctl (int dev, unsigned int cmd, caddr_t arg)
 
 static struct mixer_operations aci_mixer_operations =
 {
-       "ACI",
-       "ACI mixer",
-       aci_mixer_ioctl,
-       NULL
+       owner:  THIS_MODULE,
+       id:     "ACI",
+       name:   "ACI mixer",
+       ioctl:  aci_mixer_ioctl
 };
 
 static unsigned char
index bdab8a85b86e8100a46e40d7bccdb377b4f34e68..311cdab9bfb57a8ce235ae30aa7c81c8a79aa34d 100644 (file)
@@ -36,7 +36,6 @@
 #include <linux/isapnp.h>
 #include <linux/stddef.h>
 
-#include "soundmodule.h"
 #include "sound_config.h"
 
 #define DEBUGNOISE(x)
@@ -248,13 +247,6 @@ static void ad1816_start_input (int dev, unsigned long buf, int count,
        restore_flags (flags);
 }
 
-
-static int ad1816_ioctl (int dev, unsigned int cmd, caddr_t arg)
-{
-       return -(EINVAL);
-}
-
-
 static int ad1816_prepare_for_input (int dev, int bsize, int bcount)
 {
        unsigned long flags;
@@ -535,24 +527,20 @@ static void ad1816_close (int dev) /* close device */
 
 static struct audio_driver ad1816_audio_driver =
 {
-       ad1816_open,
-       ad1816_close,
-       ad1816_output_block,
-       ad1816_start_input,
-       ad1816_ioctl,
-       ad1816_prepare_for_input,
-       ad1816_prepare_for_output,
-       ad1816_halt,
-       NULL,
-       NULL,
-       ad1816_halt_input,
-       ad1816_halt_output,
-       ad1816_trigger,
-       ad1816_set_speed,
-       ad1816_set_bits,
-       ad1816_set_channels,
-       NULL,
-       NULL
+       owner:          THIS_MODULE,
+       open:           ad1816_open,
+       close:          ad1816_close,
+       output_block:   ad1816_output_block,
+       start_input:    ad1816_start_input,
+       prepare_for_input:      ad1816_prepare_for_input,
+       prepare_for_output:     ad1816_prepare_for_output,
+       halt_io:                ad1816_halt,
+       halt_input:     ad1816_halt_input,
+       halt_output:    ad1816_halt_output,
+       trigger:        ad1816_trigger,
+       set_speed:      ad1816_set_speed,
+       set_bits:       ad1816_set_bits,
+       set_channels:   ad1816_set_channels,
 };
 
 
@@ -992,9 +980,10 @@ ad1816_mixer_ioctl (int dev, unsigned int cmd, caddr_t arg)
 /* Mixer structure */
 
 static struct mixer_operations ad1816_mixer_operations = {
-       "AD1816",
-       "AD1816 Mixer",
-       ad1816_mixer_ioctl
+       owner:  THIS_MODULE,
+       id:     "AD1816",
+       name:   "AD1816 Mixer",
+       ioctl:  ad1816_mixer_ioctl
 };
 
 
@@ -1424,7 +1413,6 @@ static int __init init_ad1816(void)
        }
 
        attach_ad1816(&cfg);
-       SOUND_LOCK;
 
        return 0;
 }
@@ -1441,7 +1429,6 @@ static void __exit cleanup_ad1816 (void)
        }     
        nr_ad1816_devs=0;
 
-       SOUND_LOCK_END;
 #if defined CONFIG_ISAPNP || defined CONFIG_ISAPNP_MODULE
        if(activated)
                if(ad1816_dev)
index 91485b6e0c4a6af73f2104a2690205cf4bd9cbaa..3b867899bcaaa02a85eb0789017654c0c56414ca 100644 (file)
@@ -39,8 +39,6 @@
 #include <linux/stddef.h>
 #include <linux/pm.h>
 
-#include "soundmodule.h"
-
 #define DEB(x)
 #define DEB1(x)
 #include "sound_config.h"
@@ -892,29 +890,28 @@ static unsigned int ad1848_set_bits(int dev, unsigned int arg)
 
 static struct audio_driver ad1848_audio_driver =
 {
-       ad1848_open,
-       ad1848_close,
-       ad1848_output_block,
-       ad1848_start_input,
-       NULL,
-       ad1848_prepare_for_input,
-       ad1848_prepare_for_output,
-       ad1848_halt,
-       NULL,
-       NULL,
-       ad1848_halt_input,
-       ad1848_halt_output,
-       ad1848_trigger,
-       ad1848_set_speed,
-       ad1848_set_bits,
-       ad1848_set_channels
+       owner:          THIS_MODULE,
+       open:           ad1848_open,
+       close:          ad1848_close,
+       output_block:   ad1848_output_block,
+       start_input:    ad1848_start_input,
+       prepare_for_input:      ad1848_prepare_for_input,
+       prepare_for_output:     ad1848_prepare_for_output,
+       halt_io:        ad1848_halt,
+       halt_input:     ad1848_halt_input,
+       halt_output:    ad1848_halt_output,
+       trigger:        ad1848_trigger,
+       set_speed:      ad1848_set_speed,
+       set_bits:       ad1848_set_bits,
+       set_channels:   ad1848_set_channels
 };
 
 static struct mixer_operations ad1848_mixer_operations =
 {
-       "SOUNDPORT",
-       "AD1848/CS4248/CS4231",
-       ad1848_mixer_ioctl
+       owner:  THIS_MODULE,
+       id:     "SOUNDPORT",
+       name:   "AD1848/CS4248/CS4231",
+       ioctl:  ad1848_mixer_ioctl
 };
 
 static int ad1848_open(int dev, int mode)
@@ -1849,7 +1846,8 @@ int ad1848_detect(int io_base, int *ad_flags, int *osp)
        return 1;
 }
 
-int ad1848_init(char *name, int io_base, int irq, int dma_playback, int dma_capture, int share_dma, int *osp)
+int ad1848_init (char *name, int io_base, int irq, int dma_playback,
+               int dma_capture, int share_dma, int *osp, struct module *owner)
 {
        /*
         * NOTE! If irq < 0, there is another driver which has allocated the IRQ
@@ -1901,7 +1899,10 @@ int ad1848_init(char *name, int io_base, int irq, int dma_playback, int dma_capt
        portc = (ad1848_port_info *) kmalloc(sizeof(ad1848_port_info), GFP_KERNEL);
        if(portc==NULL)
                return -1;
-               
+
+       if (owner)
+               ad1848_audio_driver.owner = owner;
+       
        if ((my_dev = sound_install_audiodrv(AUDIO_DRIVER_VERSION,
                                             dev_name,
                                             &ad1848_audio_driver,
@@ -2498,7 +2499,7 @@ int probe_ms_sound(struct address_info *hw_config)
        return ad1848_detect(hw_config->io_base + 4, NULL, hw_config->osp);
 }
 
-void attach_ms_sound(struct address_info *hw_config)
+void attach_ms_sound(struct address_info *hw_config, struct module *owner)
 {
        static signed char interrupt_bits[12] =
        {
@@ -2523,7 +2524,8 @@ void attach_ms_sound(struct address_info *hw_config)
                                                    hw_config->irq,
                                                    hw_config->dma,
                                                    hw_config->dma2, 0, 
-                                                   hw_config->osp);
+                                                   hw_config->osp,
+                                                   owner);
                request_region(hw_config->io_base, 4, "WSS config");
                return;
        }
@@ -2581,7 +2583,8 @@ void attach_ms_sound(struct address_info *hw_config)
        hw_config->slots[0] = ad1848_init("MS Sound System", hw_config->io_base + 4,
                                          hw_config->irq,
                                          dma, dma2, 0,
-                                         hw_config->osp);
+                                         hw_config->osp,
+                                         THIS_MODULE);
        request_region(hw_config->io_base, 4, "WSS config");
 }
 
@@ -2829,17 +2832,15 @@ static int __init init_ad1848(void)
 
                if(!probe_ms_sound(&cfg))
                        return -ENODEV;
-               attach_ms_sound(&cfg);
+               attach_ms_sound(&cfg, THIS_MODULE);
                loaded = 1;
        }
        
-       SOUND_LOCK;
        return 0;
 }
 
 static void __exit cleanup_ad1848(void)
 {
-       SOUND_LOCK_END;
        if(loaded)
                unload_ms_sound(&cfg);
 }
index 2f2225546d71970e99d72946b09e120af86ece28..234bbb8235b9286828346f159fcf496d2f5f5cd0 100644 (file)
                
 
 int ad1848_init(char *name, int io_base, int irq, int dma_playback,
-       int dma_capture, int share_dma, int *osp);
+       int dma_capture, int share_dma, int *osp, struct module *owner);
 void ad1848_unload (int io_base, int irq, int dma_playback, int dma_capture, int share_dma);
 
 int ad1848_detect (int io_base, int *flags, int *osp);
 int ad1848_control(int cmd, int arg);
 
 void adintr(int irq, void *dev_id, struct pt_regs * dummy);
-void attach_ms_sound(struct address_info * hw_config);
+void attach_ms_sound(struct address_info * hw_config, struct module * owner);
 
 int probe_ms_sound(struct address_info *hw_config);
 void unload_ms_sound(struct address_info *hw_info);
index 95974d444342070f095e9549d21daa9f9dc4a8ad..1bc2f66535db79afdaeea8b2703fc43173e39e54 100644 (file)
 #include <linux/init.h>
 
 #include "sound_config.h"
-#include "soundmodule.h"
 
 #include "opl3.h"
 
 static void __init attach_adlib_card(struct address_info *hw_config)
 {
-       hw_config->slots[0] = opl3_init(hw_config->io_base, hw_config->osp);
+       hw_config->slots[0] = opl3_init(hw_config->io_base, hw_config->osp, THIS_MODULE);
        request_region(hw_config->io_base, 4, "OPL3/OPL2");
 }
 
@@ -50,7 +49,7 @@ static int __init init_adlib(void)
        if (probe_adlib(&cfg) == 0)
                return -ENODEV;
        attach_adlib_card(&cfg);
-       SOUND_LOCK;
+
        return 0;
 }
 
@@ -59,7 +58,6 @@ static void __exit cleanup_adlib(void)
        release_region(cfg.io_base, 4);
        sound_unload_synthdev(cfg.slots[0]);
        
-       SOUND_LOCK_END;
 }
 
 module_init(init_adlib);
index 6ea812a385ca05076516a20c44146616091a5a06..166dbb52baf9b397d191e41e5824be76db792d0c 100644 (file)
@@ -28,7 +28,6 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include "sound_config.h"
-#include "soundmodule.h"
 
 /*
  * Sanity checks
@@ -1357,13 +1356,11 @@ static int __init do_init_aedsp16(void) {
                 */
                return -EINVAL;
        }
-       SOUND_LOCK;
        return 0;
 }
 
 static void __exit cleanup_aedsp16(void) {
        uninit_aedsp16();
-       SOUND_LOCK_END;
 }
 
 module_init(do_init_aedsp16);
index ffb88a7b4921d3e59f48b4f547a3591bc7ada901..09418df79bc59957a1848a4c587f901143058755 100644 (file)
@@ -82,14 +82,15 @@ int audio_open(int dev, struct file *file)
        if (dev < 0 || dev >= num_audiodevs)
                return -ENXIO;
 
+       if (audio_devs[dev]->d->owner)
+               __MOD_INC_USE_COUNT (audio_devs[dev]->d->owner);
+
        if ((ret = DMAbuf_open(dev, mode)) < 0)
                return ret;
 
-       if (audio_devs[dev]->coproc)
-       {
+       if (audio_devs[dev]->coproc) {
                if ((ret = audio_devs[dev]->coproc->
-                       open(audio_devs[dev]->coproc->devc, COPR_PCM)) < 0)
-               {
+                       open(audio_devs[dev]->coproc->devc, COPR_PCM)) < 0) {
                        audio_release(dev, file);
                        printk(KERN_WARNING "Sound: Can't access coprocessor device\n");
                        return ret;
@@ -178,6 +179,9 @@ void audio_release(int dev, struct file *file)
        if (audio_devs[dev]->coproc)
                audio_devs[dev]->coproc->close(audio_devs[dev]->coproc->devc, COPR_PCM);
        DMAbuf_release(dev, mode);
+
+       if (audio_devs[dev]->d->owner)
+               __MOD_DEC_USE_COUNT (audio_devs[dev]->d->owner);
 }
 
 static void translate_bytes(const unsigned char *table, unsigned char *buff, int n)
index 4fee7b72f5b1dfd125a4455b3de27df952590222..a87c10ce3457b58e0427fd31fec0ddd292f70c3f 100644 (file)
@@ -31,7 +31,6 @@
 #endif
 
 #include "sound_config.h"
-#include "soundmodule.h"
 
 #include "awe_wave.h"
 #include "awe_hw.h"
@@ -496,27 +495,28 @@ static int ctrls[AWE_MD_END];
 
 static struct synth_operations awe_operations =
 {
-       "EMU8K",
-       &awe_info,
-       0,
-       SYNTH_TYPE_SAMPLE,
-       SAMPLE_TYPE_AWE32,
-       awe_open,
-       awe_close,
-       awe_ioctl,
-       awe_kill_note,
-       awe_start_note,
-       awe_set_instr_2,
-       awe_reset,
-       awe_hw_control,
-       awe_load_patch,
-       awe_aftertouch,
-       awe_controller,
-       awe_panning,
-       awe_volume_method,
-       awe_bender,
-       awe_alloc,
-       awe_setup_voice
+       owner:          THIS_MODULE,
+       id:             "EMU8K",
+       info:           &awe_info,
+       midi_dev:       0,
+       synth_type:     SYNTH_TYPE_SAMPLE,
+       synth_subtype:  SAMPLE_TYPE_AWE32,
+       open:           awe_open,
+       close:          awe_close,
+       ioctl:          awe_ioctl,
+       kill_note:      awe_kill_note,
+       start_note:     awe_start_note,
+       set_instr:      awe_set_instr_2,
+       reset:          awe_reset,
+       hw_control:     awe_hw_control,
+       load_patch:     awe_load_patch,
+       aftertouch:     awe_aftertouch,
+       controller:     awe_controller,
+       panning:        awe_panning,
+       volume_method:  awe_volume_method,
+       bender:         awe_bender,
+       alloc_voice:    awe_alloc,
+       setup_voice:    awe_setup_voice
 };
 
 
@@ -575,8 +575,6 @@ static int __init _attach_awe(void)
 
        awe_present = TRUE;
 
-       SOUND_LOCK;
-
        return 1;
 }
 
@@ -608,7 +606,6 @@ static void __exit _unload_awe(void)
 #endif
                sound_unload_synthdev(my_dev);
                awe_present = FALSE;
-               SOUND_LOCK_END;
        }
 }
 
@@ -4293,8 +4290,10 @@ static int awe_mixer_ioctl(int dev, unsigned int cmd, caddr_t arg);
 static int my_mixerdev = -1;
 
 static struct mixer_operations awe_mixer_operations = {
-       "AWE32 Equalizer",
-       awe_mixer_ioctl,
+       owner:  THIS_MODULE,
+       id:     "AWE",
+       name:   "AWE32 Equalizer",
+       ioctl:  awe_mixer_ioctl,
 };
 
 static void __init attach_mixer(void)
@@ -5225,17 +5224,13 @@ static int xg_control_change(MidiStatus *st, int cmd, int val);
 
 static struct midi_operations awe_midi_operations =
 {
-       {"AWE Midi Emu", 0, 0, SNDCARD_SB},
-       NULL /*&std_midi_synth*/,
-       {0}, /* input_info */
-       awe_midi_open, /*open*/
-       awe_midi_close, /*close*/
-       awe_midi_ioctl, /*ioctl*/
-       awe_midi_outputc, /*outputc*/
-       NULL /*start_read*/,
-       NULL /*end_read*/,
-       NULL, /* kick */
-       NULL, /* command */
+       owner:          THIS_MODULE,
+       info:           {"AWE Midi Emu", 0, 0, SNDCARD_SB},
+       in_info:        {0},
+       open:           awe_midi_open, /*open*/
+       close:          awe_midi_close, /*close*/
+       ioctl:          awe_midi_ioctl, /*ioctl*/
+       outputc:        awe_midi_outputc, /*outputc*/
 };
 
 static int my_mididev = -1;
index 8c7f907774b921afe6b4eed45911145dbe6360ca..32f230069a3ddb60fd0139ffb34928f459cc6e3a 100644 (file)
@@ -46,7 +46,6 @@
 #include <linux/init.h>
 
 #include "sound_config.h"
-#include "soundmodule.h"
 
 #include "cs4232.h"
 #include "ad1848.h"
@@ -229,7 +228,8 @@ void attach_cs4232(struct address_info *hw_config)
                                          dma1,         /* Playback DMA */
                                          dma2,         /* Capture DMA */
                                          0,
-                                         hw_config->osp);
+                                         hw_config->osp,
+                                         THIS_MODULE);
 
        if (hw_config->slots[0] != -1 &&
                audio_devs[hw_config->slots[0]]->mixer_dev!=-1)
@@ -258,7 +258,7 @@ void attach_cs4232(struct address_info *hw_config)
                if (probe_uart401(&hw_config2))
                {
                        mpu_detected = 1;
-                       attach_uart401(&hw_config2);
+                       attach_uart401(&hw_config2, THIS_MODULE);
                }
                else
                {
@@ -266,7 +266,6 @@ void attach_cs4232(struct address_info *hw_config)
                }
                hw_config->slots[1] = hw_config2.slots[1];
        }
-       SOUND_LOCK;
 }
 
 void unload_cs4232(struct address_info *hw_config)
@@ -376,7 +375,6 @@ static int __init init_cs4232(void)
 static void __exit cleanup_cs4232(void)
 {
         unload_cs4232(&cfg); /* unloads MPU as well, if needed */
-       SOUND_LOCK_END;
 }
 
 module_init(init_cs4232);
index 4c71c1265155379a8a324a711649a4855f34764e..56c1b33fa325969f32b8017350f8bf178cbf56ed 100644 (file)
@@ -160,6 +160,7 @@ typedef struct coproc_operations
 
 struct audio_driver 
 {
+       struct module *owner;
        int (*open) (int dev, int mode);
        void (*close) (int dev);
        void (*output_block) (int dev, unsigned long buf, 
@@ -239,6 +240,7 @@ int *load_mixer_volumes(char *name, int *levels, int present);
 
 struct mixer_operations 
 {
+       struct module *owner;
        char id[16];
        char name[64];
        int (*ioctl) (int dev, unsigned int cmd, caddr_t arg);
@@ -249,6 +251,7 @@ struct mixer_operations
 
 struct synth_operations 
 {
+       struct module *owner;
        char *id;       /* Unique identifier (ASCII) max 29 char */
        struct synth_info *info;
        int midi_dev;
@@ -301,6 +304,7 @@ struct midi_input_info
 
 struct midi_operations 
 {
+       struct module *owner;
        struct midi_info info;
        struct synth_operations *converter;
        struct midi_input_info in_info;
@@ -332,6 +336,7 @@ struct sound_lowlev_timer
 
 struct sound_timer_operations 
 {
+       struct module *owner;
        struct sound_timer_info info;
        int priority;
        int devlink;
index 1029f179aa071549bcbe518d56f84764893aa226..30d02d226e5350a378aece9d0f70b57f8206b712 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/module.h>
 
 #include "sound_config.h"
-#include "soundmodule.h"
 
 #include "gus.h"
 #include "gus_hw.h"
@@ -184,7 +183,8 @@ static void __init attach_gus_db16(struct address_info *hw_config)
                                          hw_config->irq,
                                          hw_config->dma,
                                          hw_config->dma, 0,
-                                         hw_config->osp);
+                                         hw_config->osp,
+                                         THIS_MODULE);
 }
 
 static void __exit unload_gus_db16(struct address_info *hw_config)
@@ -259,7 +259,7 @@ static int __init init_gus(void)
        if (!probe_gus(&cfg))
                return -ENODEV;
        attach_gus(&cfg);
-       SOUND_LOCK;
+
        return 0;
 }
 
@@ -270,7 +270,6 @@ static void __exit cleanup_gus(void)
                unload_gus_db16(&cfg);
 #endif
        unload_gus(&cfg);
-       SOUND_LOCK_END;
 }
 
 module_init(init_gus);
index 1594afb1a01186f7753ef04e5df229a292ab5c30..1705d0df26b351d7ebc77c8bc3bcacdc1008f4ce 100644 (file)
@@ -186,23 +186,17 @@ static int gus_midi_buffer_status(int dev)
 
 static struct midi_operations gus_midi_operations =
 {
-       {
-               "Gravis UltraSound Midi", 0, 0, SNDCARD_GUS
-       },
-       &std_midi_synth,
-       {0},
-       gus_midi_open,
-       gus_midi_close,
-       NULL, /* ioctl */
-       gus_midi_out,
-       gus_midi_start_read,
-       gus_midi_end_read,
-       gus_midi_kick,
-       NULL,                   /*
-                                * command
-                                */
-       gus_midi_buffer_status,
-       NULL
+       owner:          THIS_MODULE,
+       info:           {"Gravis UltraSound Midi", 0, 0, SNDCARD_GUS},
+       converter:      &std_midi_synth,
+       in_info:        {0},
+       open:           gus_midi_open,
+       close:          gus_midi_close,
+       outputc:        gus_midi_out,
+       start_read:     gus_midi_start_read,
+       end_read:       gus_midi_end_read,
+       kick:           gus_midi_kick,
+       buffer_status:  gus_midi_buffer_status,
 };
 
 void gus_midi_init(struct address_info *hw_config)
index 30ba0ede38577476a6e45b0dd4af94155a9c63a6..26853486e842be77f90991852c1058c541dedca3 100644 (file)
@@ -2613,16 +2613,16 @@ static int gus_local_qlen(int dev)
 
 static struct audio_driver gus_audio_driver =
 {
-       gus_audio_open,
-       gus_audio_close,
-       gus_audio_output_block,
-       gus_audio_start_input,
-       gus_audio_ioctl,
-       gus_audio_prepare_for_input,
-       gus_audio_prepare_for_output,
-       gus_audio_reset,
-       gus_local_qlen,
-       NULL
+       owner:          THIS_MODULE,
+       open:           gus_audio_open,
+       close:          gus_audio_close,
+       output_block:   gus_audio_output_block,
+       start_input:    gus_audio_start_input,
+       ioctl:          gus_audio_ioctl,
+       prepare_for_input:      gus_audio_prepare_for_input,
+       prepare_for_output:     gus_audio_prepare_for_output,
+       halt_io:        gus_audio_reset,
+       local_qlen:     gus_local_qlen,
 };
 
 static void guswave_setup_voice(int dev, int voice, int chn)
@@ -2702,27 +2702,28 @@ static int guswave_alloc(int dev, int chn, int note, struct voice_alloc_info *al
 
 static struct synth_operations guswave_operations =
 {
-       "GUS",
-       &gus_info,
-       0,
-       SYNTH_TYPE_SAMPLE,
-       SAMPLE_TYPE_GUS,
-       guswave_open,
-       guswave_close,
-       guswave_ioctl,
-       guswave_kill_note,
-       guswave_start_note,
-       guswave_set_instr,
-       guswave_reset,
-       guswave_hw_control,
-       guswave_load_patch,
-       guswave_aftertouch,
-       guswave_controller,
-       guswave_panning,
-       guswave_volume_method,
-       guswave_bender,
-       guswave_alloc,
-       guswave_setup_voice
+       owner:          THIS_MODULE,
+       id:             "GUS",
+       info:           &gus_info,
+       midi_dev:       0,
+       synth_type:     SYNTH_TYPE_SAMPLE,
+       synth_subtype:  SAMPLE_TYPE_GUS,
+       open:           guswave_open,
+       close:          guswave_close,
+       ioctl:          guswave_ioctl,
+       kill_note:      guswave_kill_note,
+       start_note:     guswave_start_note,
+       set_instr:      guswave_set_instr,
+       reset:          guswave_reset,
+       hw_control:     guswave_hw_control,
+       load_patch:     guswave_load_patch,
+       aftertouch:     guswave_aftertouch,
+       controller:     guswave_controller,
+       panning:        guswave_panning,
+       volume_method:  guswave_volume_method,
+       bender:         guswave_bender,
+       alloc_voice:    guswave_alloc,
+       setup_voice:    guswave_setup_voice
 };
 
 static void set_input_volumes(void)
@@ -2894,9 +2895,10 @@ int gus_default_mixer_ioctl(int dev, unsigned int cmd, caddr_t arg)
 
 static struct mixer_operations gus_mixer_operations =
 {
-       "GUS",
-       "Gravis Ultrasound",
-       gus_default_mixer_ioctl
+       owner:  THIS_MODULE,
+       id:     "GUS",
+       name:   "Gravis Ultrasound",
+       ioctl:  gus_default_mixer_ioctl
 };
 
 static int gus_default_mixer_init(void)
@@ -3050,7 +3052,8 @@ void gus_wave_init(struct address_info *hw_config)
                                                        -irq, gus_dma2, /* Playback DMA */
                                                        gus_dma,        /* Capture DMA */
                                                        1,              /* Share DMA channels with GF1 */
-                                                       hw_config->osp);
+                                                       hw_config->osp,
+                                                       THIS_MODULE);
 
                                if (num_mixers > old_num_mixers)
                                {
index 3b97a157b637aeb906741418eb829d004bc7cafb..84ca968dfcab512f6229188f941050f9b95c6803 100644 (file)
@@ -206,9 +206,10 @@ static int ics2101_mixer_ioctl(int dev, unsigned int cmd, caddr_t arg)
 
 static struct mixer_operations ics2101_mixer_operations =
 {
-       "ICS2101",
-       "ICS2101 Multimedia Mixer",
-       ics2101_mixer_ioctl
+       owner:  THIS_MODULE,
+       id:     "ICS2101",
+       name:   "ICS2101 Multimedia Mixer",
+       ioctl:  ics2101_mixer_ioctl
 };
 
 int
index 5290b42ed373834b1532116be32d54cc65d22bca..7d071ee69220723c7e33f22f8fee7344a71154b9 100644 (file)
@@ -73,7 +73,6 @@
 #include <linux/module.h>
 
 #include "sound_config.h"
-#include "soundmodule.h"
 
 #include "ad1848.h"
 #include "sb.h"
@@ -710,7 +709,8 @@ static void __init attach_mad16(struct address_info *hw_config)
                                          hw_config->irq,
                                          dma,
                                          dma2, 0,
-                                         hw_config->osp);
+                                         hw_config->osp,
+                                         THIS_MODULE);
        request_region(hw_config->io_base, 4, "MAD16 WSS config");
 }
 
@@ -724,7 +724,7 @@ static void __init attach_mad16_mpu(struct address_info *hw_config)
                hw_config->io_base = 0x220;
 
        hw_config->name = "Mad16/Mozart";
-       sb_dsp_init(hw_config);
+       sb_dsp_init(hw_config, THIS_MODULE);
        return;
 #endif
 
@@ -733,7 +733,7 @@ static void __init attach_mad16_mpu(struct address_info *hw_config)
 
        hw_config->driver_use_1 = SB_MIDI_ONLY;
        hw_config->name = "Mad16/Mozart";
-       attach_uart401(hw_config);
+       attach_uart401(hw_config, THIS_MODULE);
 }
 
 static int __init probe_mad16_mpu(struct address_info *hw_config)
@@ -1094,7 +1094,6 @@ static int __init init_mad16(void)
        if (found_mpu)
                attach_mad16_mpu(&cfg_mpu);
 
-       SOUND_LOCK;
        return 0;
 }
 
@@ -1103,7 +1102,6 @@ static void __exit cleanup_mad16(void)
        if (found_mpu)
                unload_mad16_mpu(&cfg_mpu);
        unload_mad16(&cfg);
-       SOUND_LOCK_END;
 }
 
 module_init(init_mad16);
index 797150997a2ad095c6efcaba2b8d60fd729a0119..d9226d406f532a74d60a8540fc2b1131dad88a77 100644 (file)
@@ -30,7 +30,6 @@
 #define USE_SIMPLE_MACROS
 
 #include "sound_config.h"
-#include "soundmodule.h"
 #include "sound_firmware.h"
 
 #include "mpu401.h"
@@ -375,7 +374,7 @@ static void __init attach_maui(struct address_info *hw_config)
 
        hw_config->irq *= -1;
        hw_config->name = "Maui";
-       attach_mpu401(hw_config);
+       attach_mpu401(hw_config, THIS_MODULE);
 
        if (hw_config->slots[1] != -1)  /* The MPU401 driver installed itself */ {
                struct synth_operations *synth;
@@ -443,7 +442,7 @@ static int __init init_maui(void)
        if (probe_maui(&cfg) == 0)
                return -ENODEV;
        attach_maui(&cfg);
-       SOUND_LOCK;
+
        return 0;
 }
 
@@ -452,7 +451,6 @@ static void __exit cleanup_maui(void)
        if (fw_load && maui_os)
                vfree(maui_os);
        unload_maui(&cfg);
-       SOUND_LOCK_END;
 }
 
 module_init(init_maui);
index 9f1b9ef59b1389c4fdbda22ea74dd9f9d89c8e87..b0e96c519df417628d989017547459e080285c94 100644 (file)
@@ -431,9 +431,6 @@ midi_synth_open(int dev, int mode)
        if ((err = midi_devs[orig_dev]->open(orig_dev, mode,
                               midi_synth_input, midi_synth_output)) < 0)
                return err;
-#ifdef MODULE
-       MOD_INC_USE_COUNT;
-#endif
        inc = &midi_devs[orig_dev]->in_info;
 
        save_flags(flags);
@@ -461,9 +458,6 @@ midi_synth_close(int dev)
        midi_devs[orig_dev]->outputc(orig_dev, 0xfe);
 
        midi_devs[orig_dev]->close(orig_dev);
-#ifdef MODULE
-       MOD_DEC_USE_COUNT;
-#endif
 }
 
 void
index 1509b2255a34ecf3506f33a1e03ad9218f739cda..40c8f705f9c1e67a9a938e4c1de7fa18b2b626d0 100644 (file)
@@ -22,27 +22,26 @@ static struct synth_info std_synth_info =
 
 static struct synth_operations std_midi_synth =
 {
-  "MIDI",
-  &std_synth_info,
-  0,
-  SYNTH_TYPE_MIDI,
-  0,
-  midi_synth_open,
-  midi_synth_close,
-  midi_synth_ioctl,
-  midi_synth_kill_note,
-  midi_synth_start_note,
-  midi_synth_set_instr,
-  midi_synth_reset,
-  midi_synth_hw_control,
-  midi_synth_load_patch,
-  midi_synth_aftertouch,
-  midi_synth_controller,
-  midi_synth_panning,
-  NULL,
-  midi_synth_bender,
-  NULL,        /* alloc_voice */
-  midi_synth_setup_voice,
-  midi_synth_send_sysex
+       owner:          THIS_MODULE,
+       id:             "MIDI",
+       info:           &std_synth_info,
+       midi_dev:       0,
+       synth_type:     SYNTH_TYPE_MIDI,
+       synth_subtype:  0,
+       open:           midi_synth_open,
+       close:          midi_synth_close,
+       ioctl:          midi_synth_ioctl,
+       kill_note:      midi_synth_kill_note,
+       start_note:     midi_synth_start_note,
+       set_instr:      midi_synth_set_instr,
+       reset:          midi_synth_reset,
+       hw_control:     midi_synth_hw_control,
+       load_patch:     midi_synth_load_patch,
+       aftertouch:     midi_synth_aftertouch,
+       controller:     midi_synth_controller,
+       panning:        midi_synth_panning,
+       bender:         midi_synth_bender,
+       setup_voice:    midi_synth_setup_voice,
+       send_sysex:     midi_synth_send_sysex
 };
 #endif
index 913493a4a7a47b622f23bec73534885adc589d30..d0db7d5d213d1b765a9b06928553eee6cb1088df 100644 (file)
@@ -172,6 +172,9 @@ int MIDIbuf_open(int dev, struct file *file)
         *    Interrupts disabled. Be careful
         */
 
+       if (midi_devs[dev]->owner)
+               __MOD_INC_USE_COUNT (midi_devs[dev]->owner);
+
        if ((err = midi_devs[dev]->open(dev, mode,
                                 midi_input_intr, midi_output_intr)) < 0)
                return err;
@@ -257,6 +260,9 @@ void MIDIbuf_release(int dev, struct file *file)
        if (open_devs < 2)
                del_timer(&poll_timer);;
        open_devs--;
+
+       if (midi_devs[dev]->owner)
+               __MOD_DEC_USE_COUNT (midi_devs[dev]->owner);
 }
 
 int MIDIbuf_write(int dev, struct file *file, const char *buf, int count)
index a329f0e72198c4ac804d7e6b8dc57ce08be6cdbe..de587668ad1611dd2357d9c0a243ecfad889a647 100644 (file)
  * Alan Cox            modularisation, use normal request_irq, use dev_id
  */
 
-#include <linux/init.h>
 #include <linux/module.h>
+#include <linux/init.h>
 
 #define USE_SEQ_MACROS
 #define USE_SIMPLE_MACROS
 
 #include "sound_config.h"
-#include "soundmodule.h"
 
 #include "coproc.h"
 #include "mpu401.h"
@@ -866,47 +865,45 @@ static void mpu_synth_close(int dev)
 
 static struct synth_operations mpu401_synth_proto =
 {
-       "MPU401",
-       NULL,
-       0,
-       SYNTH_TYPE_MIDI,
-       0,
-       mpu_synth_open,
-       mpu_synth_close,
-       mpu_synth_ioctl,
-       midi_synth_kill_note,
-       midi_synth_start_note,
-       midi_synth_set_instr,
-       midi_synth_reset,
-       midi_synth_hw_control,
-       midi_synth_load_patch,
-       midi_synth_aftertouch,
-       midi_synth_controller,
-       midi_synth_panning,
-       NULL,
-       midi_synth_bender,
-       NULL,                   /* alloc */
-       midi_synth_setup_voice,
-       midi_synth_send_sysex
+       owner:          THIS_MODULE,
+       id:             "MPU401",
+       info:           NULL,
+       midi_dev:       0,
+       synth_type:     SYNTH_TYPE_MIDI,
+       synth_subtype:  0,
+       open:           mpu_synth_open,
+       close:          mpu_synth_close,
+       ioctl:          mpu_synth_ioctl,
+       kill_note:      midi_synth_kill_note,
+       start_note:     midi_synth_start_note,
+       set_instr:      midi_synth_set_instr,
+       reset:          midi_synth_reset,
+       hw_control:     midi_synth_hw_control,
+       load_patch:     midi_synth_load_patch,
+       aftertouch:     midi_synth_aftertouch,
+       controller:     midi_synth_controller,
+       panning:        midi_synth_panning,
+       bender:         midi_synth_bender,
+       setup_voice:    midi_synth_setup_voice,
+       send_sysex:     midi_synth_send_sysex
 };
 
 static struct synth_operations *mpu401_synth_operations[MAX_MIDI_DEV];
 
 static struct midi_operations mpu401_midi_proto =
 {
-       {"MPU-401 Midi", 0, MIDI_CAP_MPU401, SNDCARD_MPU401},
-       NULL,
-       {0},
-       mpu401_open,
-       mpu401_close,
-       mpu401_ioctl,
-       mpu401_out,
-       mpu401_start_read,
-       mpu401_end_read,
-       mpu401_kick,
-       NULL,
-       mpu401_buffer_status,
-       mpu401_prefix_cmd
+       owner:          THIS_MODULE,
+       info:           {"MPU-401 Midi", 0, MIDI_CAP_MPU401, SNDCARD_MPU401},
+       in_info:        {0},
+       open:           mpu401_open,
+       close:          mpu401_close,
+       ioctl:          mpu401_ioctl,
+       outputc:        mpu401_out,
+       start_read:     mpu401_start_read,
+       end_read:       mpu401_end_read,
+       kick:           mpu401_kick,
+       buffer_status:  mpu401_buffer_status,
+       prefix_cmd:     mpu401_prefix_cmd
 };
 
 static struct midi_operations mpu401_midi_operations[MAX_MIDI_DEV];
@@ -942,7 +939,7 @@ static void __init mpu401_chk_version(int n, struct mpu_config *devc)
        restore_flags(flags);
 }
 
-void __init attach_mpu401(struct address_info *hw_config)
+void __init attach_mpu401(struct address_info *hw_config, struct module *owner)
 {
        unsigned long flags;
        char revision_char;
@@ -1089,6 +1086,10 @@ void __init attach_mpu401(struct address_info *hw_config)
                hw_config->slots[2] = mpu_timer_init(m);
 
        midi_devs[m] = &mpu401_midi_operations[devc->devno];
+       
+       if (owner)
+               midi_devs[m]->owner = owner;
+
        hw_config->slots[1] = m;
        sequencer_init();
 }
@@ -1574,15 +1575,16 @@ static void mpu_timer_arm(int dev, long time)
 
 static struct sound_timer_operations mpu_timer =
 {
-       {"MPU-401 Timer", 0},
-       10,                     /* Priority */
-       0,                      /* Local device link */
-       mpu_timer_open,
-       mpu_timer_close,
-       mpu_timer_event,
-       mpu_timer_get_time,
-       mpu_timer_ioctl,
-       mpu_timer_arm
+       owner:          THIS_MODULE,
+       info:           {"MPU-401 Timer", 0},
+       priority:       10,     /* Priority */
+       devlink:        0,      /* Local device link */
+       open:           mpu_timer_open,
+       close:          mpu_timer_close,
+       event:          mpu_timer_event,
+       get_time:       mpu_timer_get_time,
+       ioctl:          mpu_timer_ioctl,
+       arm_timer:      mpu_timer_arm
 };
 
 static void mpu_timer_interrupt(void)
@@ -1731,10 +1733,9 @@ int init_mpu401(void)
                cfg.io_base = io;
                if (probe_mpu401(&cfg) == 0)
                        return -ENODEV;
-               attach_mpu401(&cfg);
+               attach_mpu401(&cfg, THIS_MODULE);
        }
        
-       SOUND_LOCK;
        return 0;
 }
 
@@ -1744,7 +1745,6 @@ void cleanup_mpu401(void)
                /* Check for use by, for example, sscape driver */
                unload_mpu401(&cfg);
        }
-       SOUND_LOCK_END;
 }
 
 module_init(init_mpu401);
index a6a41cbde1ba62a71bd82e65528e7753f05c96b8..a74f7285a3318ddf5d105c4a1c73f5b0f63ecf83 100644 (file)
@@ -7,14 +7,14 @@
 
 /*     From uart401.c */
 int probe_uart401 (struct address_info *hw_config);
-void attach_uart401 (struct address_info *hw_config);
+void attach_uart401 (struct address_info *hw_config, struct module *owner);
 void unload_uart401 (struct address_info *hw_config);
 
 void uart401intr (int irq, void *dev_id, struct pt_regs * dummy);
 
 /*     From mpu401.c */
 int probe_mpu401(struct address_info *hw_config);
-void attach_mpu401(struct address_info * hw_config);
+void attach_mpu401(struct address_info * hw_config, struct module *owner);
 void unload_mpu401(struct address_info *hw_info);
 
 int intchk_mpu401(void *dev_id);
index e9ec909f660035fa290f27ed04483f2ff2100e20..a07029ad6c8efa21750fe7ac5162011dc9e178bc 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/pm.h>
 #include <linux/delay.h>
 #include "sound_config.h"
-#include "soundmodule.h"
 #include "nm256.h"
 #include "nm256_coeff.h"
 
@@ -926,9 +925,10 @@ nm256_default_mixer_ioctl (int dev, unsigned int cmd, caddr_t arg)
 }
 
 static struct mixer_operations nm256_mixer_operations = {
-    "NeoMagic",
-    "NM256AC97Mixer",
-    nm256_default_mixer_ioctl
+    owner:     THIS_MODULE,
+    id:                "NeoMagic",
+    name:      "NM256AC97Mixer",
+    ioctl:     nm256_default_mixer_ioctl
 };
 
 /*
@@ -1621,22 +1621,16 @@ nm256_audio_local_qlen(int dev)
 
 static struct audio_driver nm256_audio_driver =
 {
-    nm256_audio_open,                  /* open                 */
-    nm256_audio_close,                 /* close                */
-    nm256_audio_output_block,          /* output_block         */
-    nm256_audio_start_input,           /* start_input          */
-    nm256_audio_ioctl,                 /* ioctl                */
-    nm256_audio_prepare_for_input,     /* prepare_for_input    */
-    nm256_audio_prepare_for_output,    /* prepare_for_output   */
-    nm256_audio_reset,                 /* reset                */
-    nm256_audio_local_qlen,            /*+local_qlen           */
-    NULL,                              /*+copy_from_user       */
-    NULL,                              /*+halt_input           */
-    NULL,                              /* halt_output          */
-    NULL,                              /*+trigger              */
-    NULL,                              /*+set_speed            */
-    NULL,                              /*+set_bits             */
-    NULL,                              /*+set_channels         */
+    owner:             THIS_MODULE,
+    open:              nm256_audio_open,
+    close:             nm256_audio_close,
+    output_block:      nm256_audio_output_block,
+    start_input:       nm256_audio_start_input,
+    ioctl:             nm256_audio_ioctl,
+    prepare_for_input: nm256_audio_prepare_for_input,
+    prepare_for_output:nm256_audio_prepare_for_output,
+    halt_io:           nm256_audio_reset,
+    local_qlen:                nm256_audio_local_qlen,
 };
 
 EXPORT_SYMBOL(init_nm256);
@@ -1654,7 +1648,6 @@ static int __init do_init_nm256(void)
     printk (KERN_INFO "NeoMagic 256AV/256ZX audio driver, version 1.1\n");
 
     if (init_nm256 () == 0) {
-       SOUND_LOCK;
        loaded = 1;
        return 0;
     }
@@ -1668,8 +1661,6 @@ static void __exit cleanup_nm256 (void)
        struct nm256_info *card;
        struct nm256_info *next_card;
 
-       SOUND_LOCK_END;
-
        for (card = nmcard_list; card != NULL; card = next_card) {
            stopPlay (card);
            stopRecord (card);
index 951dfe4d3611ce6d3e72696f7e93d0a76a10fcd6..74d7149458b61e6cdca4b43fce9be409311532f3 100644 (file)
@@ -31,7 +31,6 @@
  */
 
 #include "sound_config.h"
-#include "soundmodule.h"
 
 #include "opl3.h"
 #include "opl3_hw.h"
@@ -769,7 +768,6 @@ static int opl3_open(int dev, int mode)
 
        if (devc->busy)
                return -EBUSY;
-       MOD_INC_USE_COUNT;
        devc->busy = 1;
 
        devc->v_alloc->max_voice = devc->nr_voice = (devc->model == 2) ? 18 : 9;
@@ -798,7 +796,6 @@ static void opl3_close(int dev)
        devc->fm_info.perc_mode = 0;
 
        opl3_reset(dev);
-       MOD_DEC_USE_COUNT;
 }
 
 static void opl3_hw_control(int dev, unsigned char *event)
@@ -1061,30 +1058,31 @@ static void opl3_setup_voice(int dev, int voice, int chn)
 
 static struct synth_operations opl3_operations =
 {
-       "OPL",
-       NULL,
-       0,
-       SYNTH_TYPE_FM,
-       FM_TYPE_ADLIB,
-       opl3_open,
-       opl3_close,
-       opl3_ioctl,
-       opl3_kill_note,
-       opl3_start_note,
-       opl3_set_instr,
-       opl3_reset,
-       opl3_hw_control,
-       opl3_load_patch,
-       opl3_aftertouch,
-       opl3_controller,
-       opl3_panning,
-       opl3_volume_method,
-       opl3_bender,
-       opl3_alloc_voice,
-       opl3_setup_voice
+       owner:          THIS_MODULE,
+       id:             "OPL",
+       info:           NULL,
+       midi_dev:       0,
+       synth_type:     SYNTH_TYPE_FM,
+       synth_subtype:  FM_TYPE_ADLIB,
+       open:           opl3_open,
+       close:          opl3_close,
+       ioctl:          opl3_ioctl,
+       kill_note:      opl3_kill_note,
+       start_note:     opl3_start_note,
+       set_instr:      opl3_set_instr,
+       reset:          opl3_reset,
+       hw_control:     opl3_hw_control,
+       load_patch:     opl3_load_patch,
+       aftertouch:     opl3_aftertouch,
+       controller:     opl3_controller,
+       panning:        opl3_panning,
+       volume_method:  opl3_volume_method,
+       bender:         opl3_bender,
+       alloc_voice:    opl3_alloc_voice,
+       setup_voice:    opl3_setup_voice
 };
 
-int opl3_init(int ioaddr, int *osp)
+int opl3_init(int ioaddr, int *osp, struct module *owner)
 {
        int i;
        int me;
@@ -1131,6 +1129,10 @@ int opl3_init(int ioaddr, int *osp)
        opl3_operations.info = &devc->fm_info;
 
        synth_devs[me] = &opl3_operations;
+
+       if (owner)
+               synth_devs[me]->owner = owner;
+       
        sequencer_init();
        devc->v_alloc = &opl3_operations.alloc;
        devc->chn_info = &opl3_operations.chn_info[0];
@@ -1198,11 +1200,11 @@ static int __init init_opl3 (void)
                {
                        return -ENODEV;
                }
-               me = opl3_init(io, NULL);
+               me = opl3_init(io, NULL, THIS_MODULE);
                request_region(io, 4, devc->fm_info.name);
 
        }
-       SOUND_LOCK;
+
        return 0;
 }
 
@@ -1216,7 +1218,6 @@ static void __exit cleanup_opl3(void)
                devc = NULL;
                sound_unload_synthdev(me);
        }
-       SOUND_LOCK_END;
 }
 
 module_init(init_opl3);
index 6ef00614bb86b397ac5d5db3e3b55a28692a125d..104ae9ff4e7d2c39710f26998f0e3457edf6070a 100644 (file)
@@ -6,6 +6,6 @@
  */
 
 int opl3_detect (int ioaddr, int *osp);
-int opl3_init(int ioaddr, int *osp);
+int opl3_init(int ioaddr, int *osp, struct module *owner);
 
 void enable_opl3_mode(int left, int right, int both);
index 6b82d2a171894f347c8c91dade5689f329b17658..fc8be2cd5afc285c3c02076f810c4dcdea618ad5 100644 (file)
@@ -25,7 +25,6 @@
 #undef  SB_OK
 
 #include "sound_config.h"
-#include "soundmodule.h"
 
 #include "ad1848.h"
 #include "mpu401.h"
@@ -167,7 +166,7 @@ static void __init attach_opl3sa_wss(struct address_info *hw_config)
        int nm = num_mixers;
 
        /* FIXME */
-       attach_ms_sound(hw_config);
+       attach_ms_sound(hw_config, THIS_MODULE);
        if (num_mixers > nm)    /* A mixer was installed */
        {
                AD1848_REROUTE(SOUND_MIXER_LINE1, SOUND_MIXER_CD);
@@ -180,7 +179,7 @@ static void __init attach_opl3sa_wss(struct address_info *hw_config)
 static void __init attach_opl3sa_mpu(struct address_info *hw_config)
 {
        hw_config->name = "OPL3-SA (MPU401)";
-       attach_uart401(hw_config);
+       attach_uart401(hw_config, THIS_MODULE);
 }
 
 static int __init probe_opl3sa_mpu(struct address_info *hw_config)
@@ -313,7 +312,7 @@ static int __init init_opl3sa(void)
        attach_opl3sa_wss(&cfg);
        if(found_mpu)
                attach_opl3sa_mpu(&cfg_mpu);
-       SOUND_LOCK;
+
        return 0;
 }
 
@@ -322,7 +321,6 @@ static void __exit cleanup_opl3sa(void)
        if(found_mpu)
                unload_opl3sa_mpu(&cfg_mpu);
        unload_opl3sa_wss(&cfg);
-       SOUND_LOCK_END;
 }
 
 module_init(init_opl3sa);
index 23f85733e2da97acb95eb60dfaf29bcf2e459db5..e1b31534772c67daa70464cbebeed290b064ef78 100644 (file)
@@ -41,7 +41,6 @@
 #include <linux/module.h>
 
 #include "sound_config.h"
-#include "soundmodule.h"
 
 #include "ad1848.h"
 #include "mpu401.h"
@@ -436,9 +435,10 @@ static int opl3sa2_mixer_ioctl(int dev, unsigned int cmd, caddr_t arg)
 
 static struct mixer_operations opl3sa2_mixer_operations =
 {
-       "Yamaha",
-       "",
-        opl3sa2_mixer_ioctl
+       owner:  THIS_MODULE,
+       id:     "Yamaha",
+       name:   "", /* hmm? */
+       ioctl:  opl3sa2_mixer_ioctl
 };
 
 /* End of mixer-related stuff */
@@ -452,7 +452,7 @@ static inline int __init probe_opl3sa2_mpu(struct address_info *hw_config)
 
 static inline void __init attach_opl3sa2_mpu(struct address_info *hw_config)
 {
-       attach_mpu401(hw_config);
+       attach_mpu401(hw_config, THIS_MODULE);
 }
 
 
@@ -493,7 +493,7 @@ static void __init attach_opl3sa2_mss(struct address_info *hw_config)
 
        opl3sa2_mixer_reset(devc);
 
-       attach_ms_sound(hw_config);     /* Slot 0 */
+       attach_ms_sound(hw_config, THIS_MODULE);        /* Slot 0 */
        if(hw_config->slots[0] != -1)
        {
                /* Did the MSS driver install? */
@@ -699,7 +699,7 @@ static int __init init_opl3sa2(void)
                        attach_opl3sa2_mpu(&cfg_mpu);
                }
        }
-       SOUND_LOCK;
+
        return 0;
 }
 
@@ -711,7 +711,6 @@ static void __exit cleanup_opl3sa2(void)
        }
        unload_opl3sa2_mss(&cfg2);
        unload_opl3sa2(&cfg);
-       SOUND_LOCK_END;
 }
 
 module_init(init_opl3sa2);
index 68936e577f905dd0dcaa672f5ab3b03d3bec8ce7..52e0d2865d44e55adf61d35cb46c0c2bd25182ec 100644 (file)
@@ -8,7 +8,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include "sound_config.h"
-#include "soundmodule.h"
 
 #include "pas2.h"
 #include "sb.h"
@@ -426,14 +425,13 @@ static int __init init_pas2(void)
        if (!probe_pas(&cfg))
                return -ENODEV;
        attach_pas_card(&cfg);
-       SOUND_LOCK;
+
        return 0;
 }
 
 static void __exit cleanup_pas2(void)
 {
        unload_pas(&cfg);
-       SOUND_LOCK_END;
 }
 
 module_init(init_pas2);
index 385bdd1db3e7192daa20de68c9d57f7757d869f4..979e2a8ec0b17c2d43e482c13150764ca01ee77a 100644 (file)
@@ -191,19 +191,17 @@ static int pas_buffer_status(int dev)
 
 static struct midi_operations pas_midi_operations =
 {
-       {"Pro Audio Spectrum", 0, 0, SNDCARD_PAS},
-       &std_midi_synth,
-       {0},
-       pas_midi_open,
-       pas_midi_close,
-       NULL,
-       pas_midi_out,
-       pas_midi_start_read,
-       pas_midi_end_read,
-       pas_midi_kick,
-       NULL,
-       pas_buffer_status,
-       NULL
+       owner:          THIS_MODULE,
+       info:           {"Pro Audio Spectrum", 0, 0, SNDCARD_PAS},
+       converter:      &std_midi_synth,
+       in_info:        {0},
+       open:           pas_midi_open,
+       close:          pas_midi_close,
+       outputc:        pas_midi_out,
+       start_read:     pas_midi_start_read,
+       end_read:       pas_midi_end_read,
+       kick:           pas_midi_kick,
+       buffer_status:  pas_buffer_status,
 };
 
 void pas_midi_init(void)
index fa85a927847d6fa3d6776f12b1fefad4f31e414b..00cadcc930251f3d85f7853a6b121bae94bf7d65 100644 (file)
@@ -309,9 +309,10 @@ static int pas_mixer_ioctl(int dev, unsigned int cmd, caddr_t arg)
 
 static struct mixer_operations pas_mixer_operations =
 {
-       "PAS16",
-       "Pro Audio Spectrum 16",
-       pas_mixer_ioctl
+       owner:  THIS_MODULE,
+       id:     "PAS16",
+       name:   "Pro Audio Spectrum 16",
+       ioctl:  pas_mixer_ioctl
 };
 
 int
index 5a79ebb8655ffbb4ccdcefd44002243f23ad7d7d..90d461ed08440ab75cb86d91a52e939113e0bc18 100644 (file)
@@ -373,19 +373,16 @@ static int pas_audio_prepare_for_output(int dev, int bsize, int bcount)
 
 static struct audio_driver pas_audio_driver =
 {
-       pas_audio_open,
-       pas_audio_close,
-       pas_audio_output_block,
-       pas_audio_start_input,
-       pas_audio_ioctl,
-       pas_audio_prepare_for_input,
-       pas_audio_prepare_for_output,
-       pas_audio_reset,
-       NULL,
-       NULL,
-       NULL,
-       NULL,
-       pas_audio_trigger
+       owner:          THIS_MODULE,
+       open:           pas_audio_open,
+       close:          pas_audio_close,
+       output_block:   pas_audio_output_block,
+       start_input:    pas_audio_start_input,
+       ioctl:          pas_audio_ioctl,
+       prepare_for_input:      pas_audio_prepare_for_input,
+       prepare_for_output:     pas_audio_prepare_for_output,
+       halt_io:                pas_audio_reset,
+       trigger:        pas_audio_trigger
 };
 
 void pas_pcm_init(struct address_info *hw_config)
index 7e883a625aec667fb79683b19ab594ad7137b900..a1c7693f38c63be3c196ff4c2ae0c89f553cc72f 100644 (file)
@@ -35,7 +35,6 @@
 
 #include "sound_config.h"
 #include "sound_firmware.h"
-#include "soundmodule.h"
 
 #include "ad1848.h"
 #include "mpu401.h"
@@ -569,9 +568,10 @@ static int pss_mixer_ioctl (int dev, unsigned int cmd, caddr_t arg)
 
 static struct mixer_operations pss_mixer_operations =
 {
-       "SOUNDPORT",
-       "PSS-AD1848",
-        pss_mixer_ioctl
+       owner:  THIS_MODULE,
+       id:     "SOUNDPORT",
+       name:   "PSS-AD1848",
+       ioctl:  pss_mixer_ioctl
 };
 
 void attach_pss(struct address_info *hw_config)
@@ -918,7 +918,7 @@ static coproc_operations pss_coproc_operations =
 
 static void __init attach_pss_mpu(struct address_info *hw_config)
 {
-       attach_mpu401(hw_config);       /* Slot 1 */
+       attach_mpu401(hw_config, THIS_MODULE);  /* Slot 1 */
        if (hw_config->slots[1] != -1)  /* The MPU driver installed itself */
                midi_devs[hw_config->slots[1]]->coproc = &pss_coproc_operations;
 }
@@ -987,7 +987,7 @@ static void __init attach_pss_mss(struct address_info *hw_config)
                }
        }
        pss_mixer_reset(devc);
-       attach_ms_sound(hw_config);     /* Slot 0 */
+       attach_ms_sound(hw_config, THIS_MODULE);        /* Slot 0 */
 
        if (hw_config->slots[0] != -1)
        {
@@ -1087,7 +1087,7 @@ static int __init init_pss(void)
                pssmss = 1;
                attach_pss_mss(&cfg2);
        }
-       SOUND_LOCK;
+
        return 0;
 }
 
@@ -1100,7 +1100,6 @@ static void __exit cleanup_pss(void)
        if (pssmpu)
                unload_pss_mpu(&cfg_mpu);
        unload_pss(&cfg);
-       SOUND_LOCK_END;
 }
 
 module_init(init_pss);
index f0afbe177c49cb3b04fb73fb007bd932577d22bf..dbadb8ea683b33962494d40948cf42fabacb0bf1 100644 (file)
@@ -158,14 +158,14 @@ int sb_dsp_reset (sb_devc *devc);
 void sb_setmixer (sb_devc *devc, unsigned int port, unsigned int value);
 unsigned int sb_getmixer (sb_devc *devc, unsigned int port);
 int sb_dsp_detect (struct address_info *hw_config, int pci, int pciio, struct sb_module_options *sbmo);
-int sb_dsp_init (struct address_info *hw_config);
+int sb_dsp_init (struct address_info *hw_config, struct module *owner);
 void sb_dsp_unload(struct address_info *hw_config, int sbmpu);
-int sb_mixer_init(sb_devc *devc);
+int sb_mixer_init(sb_devc *devc, struct module *owner);
 void sb_mixer_unload(sb_devc *devc);
 void sb_mixer_set_stereo (sb_devc *devc, int mode);
 void smw_mixer_init(sb_devc *devc);
-void sb_dsp_midi_init (sb_devc *devc);
-void sb_audio_init (sb_devc *devc, char *name);
+void sb_dsp_midi_init (sb_devc *devc, struct module *owner);
+void sb_audio_init (sb_devc *devc, char *name, struct module *owner);
 void sb_midi_interrupt (sb_devc *devc);
 void sb_chgmixer (sb_devc * devc, unsigned int reg, unsigned int mask, unsigned int val);
 int sb_common_mixer_set(sb_devc * devc, int dev, int left, int right);
@@ -178,7 +178,7 @@ extern sb_devc *last_sb;
 /*     From sb_common.c */
 void sb_dsp_disable_midi(int port);
 void sb_dsp_disable_recording(int port);
-void attach_sbmpu (struct address_info *hw_config);
+void attach_sbmpu (struct address_info *hw_config, struct module *owner);
 int probe_sbmpu (struct address_info *hw_config);
 void unload_sbmpu (struct address_info *hw_config);
 
index bba7baba81ae67e23797205065445bea531c3041..4e05c48a394e449fae89cdadfb4faba5183e344a 100644 (file)
@@ -933,128 +933,103 @@ sb16_audio_mmap(int dev)
 
 static struct audio_driver sb1_audio_driver =  /* SB1.x */
 {
-       sb_audio_open,
-       sb_audio_close,
-       sb_set_output_parms,
-       sb_set_input_parms,
-       NULL, /* ioctl */
-       sb1_audio_prepare_for_input,
-       sb1_audio_prepare_for_output,
-       sb1_audio_halt_xfer,
-       NULL,                   /* local_qlen */
-       NULL,                   /* copy_from_user */
-       NULL,
-       NULL,
-       sb1_audio_trigger,
-       sb1_audio_set_speed,
-       sb1_audio_set_bits,
-       sb1_audio_set_channels
+       owner:          THIS_MODULE,
+       open:           sb_audio_open,
+       close:          sb_audio_close,
+       output_block:   sb_set_output_parms,
+       start_input:    sb_set_input_parms,
+       prepare_for_input:      sb1_audio_prepare_for_input,
+       prepare_for_output:     sb1_audio_prepare_for_output,
+       halt_io:        sb1_audio_halt_xfer,
+       trigger:        sb1_audio_trigger,
+       set_speed:      sb1_audio_set_speed,
+       set_bits:       sb1_audio_set_bits,
+       set_channels:   sb1_audio_set_channels
 };
 
 static struct audio_driver sb20_audio_driver = /* SB2.0 */
 {
-       sb_audio_open,
-       sb_audio_close,
-       sb_set_output_parms,
-       sb_set_input_parms,
-       NULL,
-       sb1_audio_prepare_for_input,
-       sb1_audio_prepare_for_output,
-       sb1_audio_halt_xfer,
-       NULL,                   /* local_qlen */
-       NULL,                   /* copy_from_user */
-       NULL,
-       NULL,
-       sb20_audio_trigger,
-       sb1_audio_set_speed,
-       sb1_audio_set_bits,
-       sb1_audio_set_channels
+       owner:          THIS_MODULE,
+       open:           sb_audio_open,
+       close:          sb_audio_close,
+       output_block:   sb_set_output_parms,
+       start_input:    sb_set_input_parms,
+       prepare_for_input:      sb1_audio_prepare_for_input,
+       prepare_for_output:     sb1_audio_prepare_for_output,
+       halt_io:        sb1_audio_halt_xfer,
+       trigger:        sb20_audio_trigger,
+       set_speed:      sb1_audio_set_speed,
+       set_bits:       sb1_audio_set_bits,
+       set_channels:   sb1_audio_set_channels
 };
 
 static struct audio_driver sb201_audio_driver =                /* SB2.01 */
 {
-       sb_audio_open,
-       sb_audio_close,
-       sb_set_output_parms,
-       sb_set_input_parms,
-       NULL,
-       sb1_audio_prepare_for_input,
-       sb1_audio_prepare_for_output,
-       sb1_audio_halt_xfer,
-       NULL,                   /* local_qlen */
-       NULL,                   /* copy_from_user */
-       NULL,
-       NULL,
-       sb20_audio_trigger,
-       sb201_audio_set_speed,
-       sb1_audio_set_bits,
-       sb1_audio_set_channels
+       owner:          THIS_MODULE,
+       open:           sb_audio_open,
+       close:          sb_audio_close,
+       output_block:   sb_set_output_parms,
+       start_input:    sb_set_input_parms,
+       prepare_for_input:      sb1_audio_prepare_for_input,
+       prepare_for_output:     sb1_audio_prepare_for_output,
+       halt_io:        sb1_audio_halt_xfer,
+       trigger:        sb20_audio_trigger,
+       set_speed:      sb201_audio_set_speed,
+       set_bits:       sb1_audio_set_bits,
+       set_channels:   sb1_audio_set_channels
 };
 
 static struct audio_driver sbpro_audio_driver =                /* SB Pro */
 {
-       sb_audio_open,
-       sb_audio_close,
-       sb_set_output_parms,
-       sb_set_input_parms,
-       NULL,
-       sbpro_audio_prepare_for_input,
-       sbpro_audio_prepare_for_output,
-       sb1_audio_halt_xfer,
-       NULL,                   /* local_qlen */
-       NULL,                   /* copy_from_user */
-       NULL,
-       NULL,
-       sb20_audio_trigger,
-       sbpro_audio_set_speed,
-       sb1_audio_set_bits,
-       sbpro_audio_set_channels
+       owner:          THIS_MODULE,
+       open:           sb_audio_open,
+       close:          sb_audio_close,
+       output_block:   sb_set_output_parms,
+       start_input:    sb_set_input_parms,
+       prepare_for_input:      sbpro_audio_prepare_for_input,
+       prepare_for_output:     sbpro_audio_prepare_for_output,
+       halt_io:        sb1_audio_halt_xfer,
+       trigger:        sb20_audio_trigger,
+       set_speed:      sbpro_audio_set_speed,
+       set_bits:       sb1_audio_set_bits,
+       set_channels:   sbpro_audio_set_channels
 };
 
 static struct audio_driver jazz16_audio_driver =       /* Jazz16 and SM Wave */
 {
-       sb_audio_open,
-       sb_audio_close,
-       sb_set_output_parms,
-       sb_set_input_parms,
-       NULL,
-       sbpro_audio_prepare_for_input,
-       sbpro_audio_prepare_for_output,
-       sb1_audio_halt_xfer,
-       NULL,                   /* local_qlen */
-       NULL,                   /* copy_from_user */
-       NULL,
-       NULL,
-       sb20_audio_trigger,
-       jazz16_audio_set_speed,
-       sb16_audio_set_bits,
-       sbpro_audio_set_channels
+       owner:          THIS_MODULE,
+       open:           sb_audio_open,
+       close:          sb_audio_close,
+       output_block:   sb_set_output_parms,
+       start_input:    sb_set_input_parms,
+       prepare_for_input:      sbpro_audio_prepare_for_input,
+       prepare_for_output:     sbpro_audio_prepare_for_output,
+       halt_io:        sb1_audio_halt_xfer,
+       trigger:        sb20_audio_trigger,
+       set_speed:      jazz16_audio_set_speed,
+       set_bits:       sb16_audio_set_bits,
+       set_channels:   sbpro_audio_set_channels
 };
 
 static struct audio_driver sb16_audio_driver = /* SB16 */
 {
-       sb_audio_open,
-       sb_audio_close,
-       sb_set_output_parms,
-       sb_set_input_parms,
-       NULL,
-       sb16_audio_prepare_for_input,
-       sb16_audio_prepare_for_output,
-       sb1_audio_halt_xfer,
-       NULL,                   /* local_qlen */
-       sb16_copy_from_user,    /* copy_from_user */
-       NULL,
-       NULL,
-       sb16_audio_trigger,
-       sb16_audio_set_speed,
-       sb16_audio_set_bits,
-       sbpro_audio_set_channels,
-       NULL,
-       NULL,
-       sb16_audio_mmap
+       owner:          THIS_MODULE,
+       open:           sb_audio_open,
+       close:          sb_audio_close,
+       output_block:   sb_set_output_parms,
+       start_input:    sb_set_input_parms,
+       prepare_for_input:      sb16_audio_prepare_for_input,
+       prepare_for_output:     sb16_audio_prepare_for_output,
+       halt_io:        sb1_audio_halt_xfer,
+       copy_user:      sb16_copy_from_user,
+       trigger:        sb16_audio_trigger,
+       set_speed:      sb16_audio_set_speed,
+       set_bits:       sb16_audio_set_bits,
+       set_channels:   sbpro_audio_set_channels,
+       mmap:           sb16_audio_mmap
 };
 
-void sb_audio_init(sb_devc * devc, char *name)
+void sb_audio_init(sb_devc * devc, char *name, struct module *owner)
 {
        int audio_flags = 0;
        int format_mask = AFMT_U8;
@@ -1111,6 +1086,9 @@ void sb_audio_init(sb_devc * devc, char *name)
                        driver = &sbpro_audio_driver;
        }
 
+       if (owner)
+                       driver->owner = owner;
+       
        if ((devc->dev = sound_install_audiodrv(AUDIO_DRIVER_VERSION,
                                name,driver, sizeof(struct audio_driver),
                                audio_flags, format_mask, devc,
index f66c0b6048279aee9545203ff61b12cdc7eec559..bb01bf6972516f6cffb008d2e00ce6ea3cd5ab49 100644 (file)
@@ -54,7 +54,6 @@
 #include <linux/isapnp.h>
 
 #include "sound_config.h"
-#include "soundmodule.h"
 
 #include "sb_mixer.h"
 #include "sb.h"
@@ -88,7 +87,7 @@ static int __initdata sm_games        = 0;    /* Logitech soundman games? */
 
 static void __init attach_sb_card(struct address_info *hw_config)
 {
-       if(!sb_dsp_init(hw_config))
+       if(!sb_dsp_init(hw_config, THIS_MODULE))
                hw_config->slots[0] = -1;
 }
 
@@ -684,11 +683,9 @@ static int __init init_sb(void)
                if (probe_sbmpu(&cfg_mpu[card]))
                        sbmpu[card] = 1;
                if (sbmpu[card])
-                       attach_sbmpu(&cfg_mpu[card]);
+                       attach_sbmpu(&cfg_mpu[card], THIS_MODULE);
        }
 
-       SOUND_LOCK;
-
        if(isapnp)
                printk(KERN_NOTICE "sb: %d Soundblaster PnP card(s) found.\n", sb_cards_num);
 
@@ -718,7 +715,6 @@ static void __exit cleanup_sb(void)
                        opl_dev[i]->deactivate(opl_dev[i]);
 #endif
        }
-       SOUND_LOCK_END; 
 }
 
 module_init(init_sb);
index 58d5ef474c02ea0a6b5349f229d65e75f13bbe5b..4e3503bab84d40c528b368a630aab092e694ab92 100644 (file)
@@ -634,7 +634,7 @@ int sb_dsp_detect(struct address_info *hw_config, int pci, int pciio, struct sb_
        return 1;
 }
 
-int sb_dsp_init(struct address_info *hw_config)
+int sb_dsp_init(struct address_info *hw_config, struct module *owner)
 {
        sb_devc *devc;
        char name[100];
@@ -812,10 +812,10 @@ int sb_dsp_init(struct address_info *hw_config)
 
        if (!(devc->caps & SB_NO_MIXER))
                if (devc->major == 3 || devc->major == 4)
-                       sb_mixer_init(devc);
+                       sb_mixer_init(devc, owner);
 
        if (!(devc->caps & SB_NO_MIDI))
-               sb_dsp_midi_init(devc);
+               sb_dsp_midi_init(devc, owner);
 
        if (hw_config->name == NULL)
                hw_config->name = "Sound Blaster (8 BIT/MONO ONLY)";
@@ -861,7 +861,7 @@ int sb_dsp_init(struct address_info *hw_config)
                        if (sound_alloc_dma(devc->dma16, "SoundBlaster16"))
                                printk(KERN_WARNING "Sound Blaster:  can't allocate 16 bit DMA channel %d.\n", devc->dma16);
                }
-               sb_audio_init(devc, name);
+               sb_audio_init(devc, name, owner);
                hw_config->slots[0]=devc->dev;
        }
        else
@@ -1190,18 +1190,18 @@ static int init_Jazz16_midi(sb_devc * devc, struct address_info *hw_config)
        return 1;
 }
 
-void attach_sbmpu(struct address_info *hw_config)
+void attach_sbmpu(struct address_info *hw_config, struct module *owner)
 {
        if (last_sb->model == MDL_ESS) {
 #if defined(CONFIG_SOUND_MPU401)
-               attach_mpu401(hw_config);
+               attach_mpu401(hw_config, owner);
                if (last_sb->irq == -hw_config->irq) {
                        last_sb->midi_irq_cookie=(void *)hw_config->slots[1];
                }
 #endif
                return;
        }
-       attach_uart401(hw_config);
+       attach_uart401(hw_config, THIS_MODULE);
        last_sb->midi_irq_cookie=midi_devs[hw_config->slots[4]]->devc;
 }
 
index dfb59016bffd55e61e9b17ca2f17c372b2c46c5a..a52fd18e232795f2de12be669ef88ec9096a4381 100644 (file)
@@ -707,22 +707,18 @@ static short ess_audio_set_channels(int dev, short channels)
 
 static struct audio_driver ess_audio_driver =   /* ESS ES688/1688 */
 {
-       sb_audio_open,
-       sb_audio_close,
-       ess_set_output_parms,
-       ess_set_input_parms,
-       NULL,
-       ess_audio_prepare_for_input,
-       ess_audio_prepare_for_output,
-       ess_audio_halt_xfer,
-       NULL,           /* local_qlen */
-       NULL,           /* copy_from_user */
-       NULL,
-       NULL,
-       ess_audio_trigger,
-       ess_audio_set_speed,
-       ess_audio_set_bits,
-       ess_audio_set_channels
+       owner:                  THIS_MODULE,
+       open:                   sb_audio_open,
+       close:                  sb_audio_close,
+       output_block:   ess_set_output_parms,
+       start_input:    ess_set_input_parms,
+       prepare_for_input:      ess_audio_prepare_for_input,
+       prepare_for_output:     ess_audio_prepare_for_output,
+       halt_io:                ess_audio_halt_xfer,
+       trigger:                ess_audio_trigger,
+       set_speed:              ess_audio_set_speed,
+       set_bits:               ess_audio_set_bits,
+       set_channels:   ess_audio_set_channels
 };
 
 /*
index 59cbfca0373939b395353dbbb6ad8f3e9c661e48..f5f6e3d6c62a5cf5af6f8746189e4d45559abd11 100644 (file)
@@ -147,24 +147,19 @@ void sb_midi_interrupt(sb_devc * devc)
 
 static struct midi_operations sb_midi_operations =
 {
-       {
-               "Sound Blaster", 0, 0, SNDCARD_SB
-       },
-       &std_midi_synth,
-       {0},
-       sb_midi_open,
-       sb_midi_close,
-       sb_midi_ioctl,
-       sb_midi_out,
-       sb_midi_start_read,
-       sb_midi_end_read,
-       NULL,
-       NULL,
-       NULL,
-       NULL
+       owner:          THIS_MODULE,
+       info:           {"Sound Blaster", 0, 0, SNDCARD_SB},
+       converter:      &std_midi_synth,
+       in_info:        {0},
+       open:           sb_midi_open,
+       close:          sb_midi_close,
+       ioctl:          sb_midi_ioctl,
+       outputc:        sb_midi_out,
+       start_read:     sb_midi_start_read,
+       end_read:       sb_midi_end_read,
 };
 
-void sb_dsp_midi_init(sb_devc * devc)
+void sb_dsp_midi_init(sb_devc * devc, struct module *owner)
 {
        int dev;
 
@@ -189,6 +184,9 @@ void sb_dsp_midi_init(sb_devc * devc)
        memcpy((char *) midi_devs[dev], (char *) &sb_midi_operations,
               sizeof(struct midi_operations));
 
+       if (owner)
+                       midi_devs[dev]->owner = owner;
+       
        midi_devs[dev]->devc = devc;
 
 
index 5a5487b16833764798f94c395335c3f6eef1c57e..6745cac220ce7bd2868e597d914ecf032c734a56 100644 (file)
@@ -626,16 +626,18 @@ static int sb_mixer_ioctl(int dev, unsigned int cmd, caddr_t arg)
 
 static struct mixer_operations sb_mixer_operations =
 {
-       "SB",
-       "Sound Blaster",
-       sb_mixer_ioctl
+       owner:  THIS_MODULE,
+       id:     "SB",
+       name:   "Sound Blaster",
+       ioctl:  sb_mixer_ioctl
 };
 
 static struct mixer_operations als007_mixer_operations =
 {
-       "ALS007",
-       "Avance ALS-007",
-       sb_mixer_ioctl
+       owner:  THIS_MODULE,
+       id:     "ALS007",
+       name:   "Avance ALS-007",
+       ioctl:  sb_mixer_ioctl
 };
 
 static void sb_mixer_reset(sb_devc * devc)
@@ -658,7 +660,7 @@ static void sb_mixer_reset(sb_devc * devc)
        };
 }
 
-int sb_mixer_init(sb_devc * devc)
+int sb_mixer_init(sb_devc * devc, struct module *owner)
 {
        int mixer_type = 0;
        int m;
@@ -735,6 +737,10 @@ int sb_mixer_init(sb_devc * devc)
                memcpy ((char *) mixer_devs[m], (char *) &als007_mixer_operations, sizeof (struct mixer_operations));
 
        mixer_devs[m]->devc = devc;
+
+       if (owner)
+                        mixer_devs[m]->owner = owner;
+       
        devc->my_mixerdev = m;
        sb_mixer_reset(devc);
        return 1;
index 270d489d63bdfe1b0137d24d5f41f77c9f8c4860..117280d1ab86dd79d939aa24cb065441a4c88ccd 100644 (file)
@@ -1068,6 +1068,9 @@ int sequencer_open(int dev, struct file *file)
                if (synth_devs[i]==NULL)
                        continue;
 
+               if (synth_devs[i]->owner)
+                       __MOD_INC_USE_COUNT (synth_devs[i]->owner);
+
                if ((tmp = synth_devs[i]->open(i, mode)) < 0)
                {
                        printk(KERN_WARNING "Sequencer: Warning! Cannot open synth device #%d (%d)\n", i, tmp);
@@ -1101,6 +1104,9 @@ int sequencer_open(int dev, struct file *file)
                for (i = 0; i < max_mididev; i++)
                        if (!midi_opened[i] && midi_devs[i])
                        {
+                               if (midi_devs[i]->owner)
+                                       __MOD_INC_USE_COUNT (midi_devs[i]->owner);
+       
                                if ((retval = midi_devs[i]->open(i, mode,
                                        sequencer_midi_input, sequencer_midi_output)) >= 0)
                                {
@@ -1108,8 +1114,12 @@ int sequencer_open(int dev, struct file *file)
                                }
                        }
        }
-       if (seq_mode == SEQ_2)
+
+       if (seq_mode == SEQ_2) {
+               if (tmr->owner)
+                       __MOD_INC_USE_COUNT (tmr->owner);
                tmr->open(tmr_no, seq_mode);
+       }
 
        init_waitqueue_head(&seq_sleeper);
        init_waitqueue_head(&midi_sleeper);
@@ -1191,6 +1201,9 @@ void sequencer_release(int dev, struct file *file)
                        {
                                synth_devs[i]->close(i);
 
+                               if (synth_devs[i]->owner)
+                                       __MOD_DEC_USE_COUNT (synth_devs[i]->owner);
+
                                if (synth_devs[i]->midi_dev)
                                        midi_opened[synth_devs[i]->midi_dev] = 0;
                        }
@@ -1198,12 +1211,18 @@ void sequencer_release(int dev, struct file *file)
 
        for (i = 0; i < max_mididev; i++)
        {
-               if (midi_opened[i])
+               if (midi_opened[i]) {
                        midi_devs[i]->close(i);
+                       if (midi_devs[i]->owner)
+                               __MOD_DEC_USE_COUNT (midi_devs[i]->owner);
+               }
        }
 
-       if (seq_mode == SEQ_2)
+       if (seq_mode == SEQ_2) {
                tmr->close(tmr_no);
+               if (tmr->owner)
+                       __MOD_DEC_USE_COUNT (tmr->owner);
+       }
 
        if (obsolete_api_used)
                printk(KERN_WARNING "/dev/music: Obsolete (4 byte) API was used by %s\n", current->comm);
index 7c02ee8b241c0f3cd136004662bc32e18f1d0b63..ef0e4199c2f5d60968364708c0ffb6759e11cb25 100644 (file)
@@ -22,8 +22,6 @@
 #include <linux/module.h>
 
 #include "sound_config.h"
-#include "soundmodule.h"
-
 #include "ad1848.h"
 
 static void sleep( unsigned howlong )
@@ -115,7 +113,7 @@ static void __init attach_sgalaxy( struct address_info *ai )
        
        request_region( ai->ai_sgbase, 0x10, "SoundGalaxy SB" );
  
-       attach_ms_sound( ai );
+       attach_ms_sound(ai, THIS_MODULE);
        n=ai->slots[0];
        
        if (n!=-1 && audio_devs[n]->mixer_dev != -1 ) {
@@ -163,14 +161,12 @@ static int __init init_sgalaxy(void)
 
        attach_sgalaxy(&cfg);
 
-       SOUND_LOCK;
        return 0;
 }
 
 static void __exit cleanup_sgalaxy(void)
 {
        unload_sgalaxy(&cfg);
-       SOUND_LOCK_END;
 }
 
 module_init(init_sgalaxy);
index fcd5c9ed13beb05e14806b5d6eb320b217153259..9d5d4ac22d3a8a4dad7f261b1abb50158db7004e 100644 (file)
@@ -27,7 +27,6 @@
 #include <asm/io.h>
 
 #include "sound_config.h"
-#include "soundmodule.h"
 
 /*
  *     Define our PCI vendor ID here
@@ -137,7 +136,13 @@ static int mycard_install(struct pci_dev *pcidev)
         */
         
        mss_data[cards].slots[3] = ad1848_init("MyCard MSS 16bit", 
-                       mssbase, mss_data[cards].irq);
+                       mssbase,
+                       mss_data[cards].irq,
+                       mss_data[cards].dma,
+                       mss_data[cards].dma,
+                       0,
+                       0,
+                       THIS_MODULE);
 
        cards++;        
        return 1;
@@ -187,17 +192,13 @@ int init_module(void)
                printk(KERN_ERR "No "CARD_NAME" cards found.\n");
                return -ENODEV;
        }
-       /*
-        *      Binds us to the sound subsystem 
-        */
-       SOUND_LOCK;
+
        return 0;
 }
 
 /*
  *     This is called when it is removed. It will only be removed 
- *     when its use count is 0. For sound the SOUND_LOCK/SOUND_UNLOCK
- *     macros hide the entire work for this.
+ *     when its use count is 0.
  */
  
 void cleanup_module(void)
@@ -218,9 +219,5 @@ void cleanup_module(void)
                 */
                sound_unload_audiodevice(mss_data[i].slots[3]);
        }
-       /*
-        *      Final clean up with the sound layer
-        */
-       SOUND_LOCK_END;
 }
 
index 0ef54e6d0843fd2930b236a476eecf231a7f850b..d478e243dbe8c5e10481b37c5cc6aef11b43c015 100644 (file)
@@ -32,7 +32,6 @@
 #define NO_SAMPLE              0xffff
 
 #include "sound_config.h"
-#include "soundmodule.h"
 #include "softoss.h"
 #include <linux/ultrasound.h>
 
@@ -1394,27 +1393,28 @@ static void softsyn_reset(int devno)
 
 static struct synth_operations softsyn_operations =
 {
-       "SoftOSS",
-       &softsyn_info,
-       0,
-       SYNTH_TYPE_SAMPLE,
-       0,
-       softsyn_open,
-       softsyn_close,
-       softsyn_ioctl,
-       softsyn_kill_note,
-       softsyn_start_note,
-       softsyn_set_instr,
-       softsyn_reset,
-       softsyn_hw_control,
-       softsyn_load_patch,
-       softsyn_aftertouch,
-       softsyn_controller,
-       softsyn_panning,
-       softsyn_volume_method,
-       softsyn_bender,
-       softsyn_alloc_voice,
-       softsyn_setup_voice
+       owner:          THIS_MODULE,
+       id:             "SoftOSS",
+       info:           &softsyn_info,
+       midi_dev:       0,
+       synth_type:     SYNTH_TYPE_SAMPLE,
+       synth_subtype:  0,
+       open:           softsyn_open,
+       close:          softsyn_close,
+       ioctl:          softsyn_ioctl,
+       kill_note:      softsyn_kill_note,
+       start_note:     softsyn_start_note,
+       set_instr:      softsyn_set_instr,
+       reset:          softsyn_reset,
+       hw_control:     softsyn_hw_control,
+       load_patch:     softsyn_load_patch,
+       aftertouch:     softsyn_aftertouch,
+       controller:     softsyn_controller,
+       panning:        softsyn_panning,
+       volume_method:  softsyn_volume_method,
+       bender:         softsyn_bender,
+       alloc_voice:    softsyn_alloc_voice,
+       setup_voice:    softsyn_setup_voice
 };
 
 /*
@@ -1517,7 +1517,7 @@ static int __init init_softoss(void)
        if (!probe_softsyn(&cfg))
                return -ENODEV;
        attach_softsyn_card(&cfg);
-       SOUND_LOCK;
+
        return 0;
 }
 
@@ -1526,7 +1526,6 @@ static void __exit cleanup_softoss(void)
        unload_softsyn(&cfg);
        sound_unload_synthdev(devc->synthdev);
        sound_unload_timerdev(devc->timerdev);
-       SOUND_LOCK_END;
 }
 
 module_init(init_softoss);
index a3601dcf6e178fcab02757e399e35a59244bbfef..764687756ee88afb955e5d0f0ecb9ad670393347 100644 (file)
@@ -52,11 +52,5 @@ EXPORT_SYMBOL(conf_printf2);
 extern int softoss_dev;
 EXPORT_SYMBOL(softoss_dev);
 
-/* Locking */
-extern struct notifier_block *sound_locker;
-extern void sound_notifier_chain_register(struct notifier_block *);
-EXPORT_SYMBOL(sound_locker);
-EXPORT_SYMBOL(sound_notifier_chain_register);
-
 MODULE_DESCRIPTION("OSS Sound subsystem");
 MODULE_AUTHOR("Hannu Savolainen, et al.");
index 33901aa1bd1faa889f5c3793cccdb3d609028a38..2aabeb309fbe7bec66f349e24b267d33a32da112 100644 (file)
@@ -264,15 +264,16 @@ static void timer_arm(int dev, long time)
 
 static struct sound_timer_operations sound_timer =
 {
-       {"Sound Timer", 0},
-       1,                      /* Priority */
-       0,                      /* Local device link */
-       timer_open,
-       timer_close,
-       timer_event,
-       timer_get_time,
-       timer_ioctl,
-       timer_arm
+       owner:          THIS_MODULE,
+       info:           {"Sound Timer", 0},
+       priority:       1,      /* Priority */
+       devlink:        0,      /* Local device link */
+       open:           timer_open,
+       close:          timer_close,
+       event:          timer_event,
+       get_time:       timer_get_time,
+       ioctl:          timer_ioctl,
+       arm_timer:      timer_arm
 };
 
 void sound_timer_interrupt(void)
index d56b8f77f6939d557b9ec511d501835e2df8b56e..fc715db83a2575b0d3a91c78c46200a5d12d7b5d 100644 (file)
 #include <linux/delay.h>
 #include <linux/proc_fs.h>
 #include <linux/smp_lock.h>
-#include <linux/notifier.h>
-
-
-struct notifier_block *sound_locker=(struct notifier_block *)0;
-static int lock_depth = 0;
 
 /*
  * This ought to be moved into include/asm/dma.h
@@ -78,7 +73,6 @@ static char     dma_alloc_map[MAX_DMA_CHANNELS] = {0};
 #define DMA_MAP_BUSY           2
 
 
-static int in_use = 0;         /* Total # of open devices */
 unsigned long seq_time = 0;    /* Time for /dev/sequencer */
 
 /*
@@ -221,7 +215,7 @@ static int sound_open(struct inode *inode, struct file *file)
 
        DEB(printk("sound_open(dev=%d)\n", dev));
        if ((dev >= SND_NDEVS) || (dev < 0)) {
-               /* printk(KERN_ERR "Invalid minor device %d\n", dev);*/
+               printk(KERN_ERR "Invalid minor device %d\n", dev);
                return -ENXIO;
        }
        switch (dev & 0x0f) {
@@ -234,6 +228,9 @@ static int sound_open(struct inode *inode, struct file *file)
                }
                if (dev && (dev >= num_mixers || mixer_devs[dev] == NULL))
                        return -ENXIO;
+
+               if (mixer_devs[dev]->owner)
+                       __MOD_INC_USE_COUNT (mixer_devs[dev]->owner);
                break;
 
        case SND_DEV_SEQ:
@@ -258,10 +255,6 @@ static int sound_open(struct inode *inode, struct file *file)
                printk(KERN_ERR "Invalid minor device %d\n", dev);
                return -ENXIO;
        }
-       in_use++;
-
-       notifier_call_chain(&sound_locker, 1, 0);
-       lock_depth++;
 
        return 0;
 }
@@ -274,6 +267,8 @@ static int sound_release(struct inode *inode, struct file *file)
        DEB(printk("sound_release(dev=%d)\n", dev));
        switch (dev & 0x0f) {
        case SND_DEV_CTL:
+               if (mixer_devs[dev]->owner)
+                       __MOD_DEC_USE_COUNT (mixer_devs[dev]->owner);
                break;
                
        case SND_DEV_SEQ:
@@ -294,10 +289,6 @@ static int sound_release(struct inode *inode, struct file *file)
        default:
                printk(KERN_ERR "Sound error: Releasing unknown device 0x%02x\n", dev);
        }
-       in_use--;
-
-       notifier_call_chain(&sound_locker, 0, 0);
-       lock_depth--;
        unlock_kernel();
 
        return 0;
@@ -811,27 +802,3 @@ void conf_printf2(char *name, int base, int irq, int dma, int dma2)
        printk("\n");
 #endif
 }
-
-/*
- *     Module and lock management
- */
-/*
- *     When a sound module is registered we need to bring it to the current
- *     lock level...
- */
-void sound_notifier_chain_register(struct notifier_block *bl)
-{
-       int ct=0;
-       
-       notifier_chain_register(&sound_locker, bl);
-       /*
-        *      Normalise the lock count by calling the entry directly. We
-        *      have to call the module as it owns its own use counter
-        */
-       while(ct<lock_depth) {
-               bl->notifier_call(bl, 1, 0);
-               ct++;
-       }
-}
diff --git a/drivers/sound/soundmodule.h b/drivers/sound/soundmodule.h
deleted file mode 100644 (file)
index 2187f1c..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef _SOUNDMODULE_H
-#define _SOUNDMODULE_H
-
-#include <linux/notifier.h>
-#include <linux/module.h>
-
-extern struct notifier_block *sound_locker;
-extern void sound_notifier_chain_register(struct notifier_block *);
-
-#define SOUND_LOCK             sound_notifier_chain_register(&sound_notifier); 
-#define SOUND_LOCK_END         notifier_chain_unregister(&sound_locker, &sound_notifier)
-
-static int my_notifier_call(struct notifier_block *b, unsigned long foo, void *bar)
-{
-       if(foo)
-               MOD_INC_USE_COUNT;
-       else
-               MOD_DEC_USE_COUNT;
-       return NOTIFY_DONE;
-}
-
-static struct notifier_block sound_notifier=
-{
-       my_notifier_call,
-       (void *)0,
-       0
-};
-
-#endif /* _SOUNDMODULE_H */
index 21fd2ea2e5acb4d34a899e4e59a373dc5dd3b47a..1c399bd5cf1e630d950c8cfbf9de26ff7e1692f5 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/module.h>
 
 #include "sound_config.h"
-#include "soundmodule.h"
 #include "sound_firmware.h"
 
 #include <linux/types.h>
@@ -728,7 +727,7 @@ void attach_sscape(struct address_info *hw_config)
        hw_config->name = "SoundScape";
 
        hw_config->irq *= -1;   /* Negative value signals IRQ sharing */
-       attach_mpu401(hw_config);
+       attach_mpu401(hw_config, THIS_MODULE);
        hw_config->irq *= -1;   /* Restore it */
 
        if (hw_config->slots[1] != -1)  /* The MPU driver installed itself */
@@ -1378,21 +1377,16 @@ static void __init attach_ss_ms_sound(struct address_info *hw_config)
        if (hw_config->irq == devc->irq)
                printk(KERN_WARNING "soundscape: Warning! The WSS mode can't share IRQ with MIDI\n");
                                
-       if (! sscape_is_pnp )
-               hw_config->slots[0] = ad1848_init("SoundScape", hw_config->io_base,
-                                                 hw_config->irq,
-                                                 hw_config->dma,
-                                                 hw_config->dma,
-                                                 0,
-                                                 devc->osp);
-
-       else 
-               hw_config->slots[0] = ad1848_init("SoundScape PNP", hw_config->io_base,
-                                                 hw_config->irq,
-                                                 hw_config->dma,
-                                                 hw_config->dma,
-                                                 0,
-                                                 devc->osp);
+       hw_config->slots[0] = ad1848_init(
+                       sscape_is_pnp ? "SoundScape" : "SoundScape PNP",
+                       hw_config->io_base,
+                       hw_config->irq,
+                       hw_config->dma,
+                       hw_config->dma,
+                       0,
+                       devc->osp,
+                       THIS_MODULE);
+
                                          
        if (hw_config->slots[0] != -1)  /* The AD1848 driver installed itself */
        {
@@ -1497,7 +1491,7 @@ static int __init init_sscape(void)
 
        if (mss)
                attach_ss_ms_sound(&cfg);
-       SOUND_LOCK;
+
        return 0;
 }
 
@@ -1505,7 +1499,6 @@ static void __exit cleanup_sscape(void)
 {
        if (mss)
                unload_ss_ms_sound(&cfg);
-       SOUND_LOCK_END;
        unload_sscape(&cfg_mpu);
 }
 
index 2d0cc953f37d5ffa49f913c821c354ac79f1ef08..ba6a572a0eb0baaedadd3c29efc153b4f5bded5e 100644 (file)
@@ -274,13 +274,14 @@ def_tmr_arm(int dev, long time)
 
 struct sound_timer_operations default_sound_timer =
 {
-       {"System clock", 0},
-       0,                      /* Priority */
-       0,                      /* Local device link */
-       def_tmr_open,
-       def_tmr_close,
-       def_tmr_event,
-       def_tmr_get_time,
-       def_tmr_ioctl,
-       def_tmr_arm
+       owner:          THIS_MODULE,
+       info:           {"System clock", 0},
+       priority:       0,      /* Priority */
+       devlink:        0,      /* Local device link */
+       open:           def_tmr_open,
+       close:          def_tmr_close,
+       event:          def_tmr_event,
+       get_time:       def_tmr_get_time,
+       ioctl:          def_tmr_ioctl,
+       arm_timer:      def_tmr_arm
 };
index c1d71750e53813b4b28814886fb8b17f8ebf728b..d3cc76046b87516df4ae3a69cbc99a575c04a687 100644 (file)
@@ -21,7 +21,6 @@
 #include <linux/module.h>
 
 #include "sound_config.h"
-#include "soundmodule.h"
 #include "sb.h"
 #include "sound_firmware.h"
 
@@ -259,7 +258,8 @@ static void __init attach_trix_wss(struct address_info *hw_config)
                                          dma1,
                                          dma2,
                                          0,
-                                         hw_config->osp);
+                                         hw_config->osp,
+                                         THIS_MODULE);
        request_region(hw_config->io_base, 4, "MSS config");
 
        if (num_mixers > old_num_mixers)        /* Mixer got installed */
@@ -332,7 +332,7 @@ static void __init attach_trix_sb(struct address_info *hw_config)
        old_quiet = sb_be_quiet;
        sb_be_quiet = 1;
 
-       sb_dsp_init(hw_config);
+       sb_dsp_init(hw_config, THIS_MODULE);
 
        sb_be_quiet = old_quiet;
 }
@@ -340,7 +340,7 @@ static void __init attach_trix_sb(struct address_info *hw_config)
 static void __init attach_trix_mpu(struct address_info *hw_config)
 {
        hw_config->name = "AudioTrix Pro";
-       attach_uart401(hw_config);
+       attach_uart401(hw_config, THIS_MODULE);
 }
 
 static int __init probe_trix_mpu(struct address_info *hw_config)
@@ -515,7 +515,7 @@ static int __init init_trix(void)
                if (mpu)
                        attach_trix_mpu(&cfg_mpu);
        }
-       SOUND_LOCK;
+
        return 0;
 }
 
@@ -528,7 +528,6 @@ static void __exit cleanup_trix(void)
        if (mpu)
                unload_trix_mpu(&cfg_mpu);
        unload_trix_wss(&cfg);
-       SOUND_LOCK_END;
 }
 
 module_init(init_trix);
index 71721dd3e74d0358e560f9673beb0dd22e9e61a1..0b7eb3297a0a787d8d1c8c4c2bef94bc9a4d06e7 100644 (file)
@@ -24,7 +24,6 @@
 #include <linux/module.h>
 
 #include "sound_config.h"
-#include "soundmodule.h"
 
 #include "mpu401.h"
 
@@ -206,21 +205,17 @@ static inline int uart401_buffer_status(int dev)
 
 static struct midi_operations uart401_operations =
 {
-       {
-               "MPU-401 (UART) MIDI", 0, 0, SNDCARD_MPU401
-       },
-       &std_midi_synth,
-       {0},
-       uart401_open,
-       uart401_close,
-       NULL, /* ioctl */
-       uart401_out,
-       uart401_start_read,
-       uart401_end_read,
-       uart401_kick,
-       NULL,
-       uart401_buffer_status,
-       NULL
+       owner:          THIS_MODULE,
+       info:           {"MPU-401 (UART) MIDI", 0, 0, SNDCARD_MPU401},
+       converter:      &std_midi_synth,
+       in_info:        {0},
+       open:           uart401_open,
+       close:          uart401_close,
+       outputc:        uart401_out,
+       start_read:     uart401_start_read,
+       end_read:       uart401_end_read,
+       kick:           uart401_kick,
+       buffer_status:  uart401_buffer_status,
 };
 
 static void enter_uart_mode(uart401_devc * devc)
@@ -246,7 +241,7 @@ static void enter_uart_mode(uart401_devc * devc)
        restore_flags(flags);
 }
 
-void attach_uart401(struct address_info *hw_config)
+void attach_uart401(struct address_info *hw_config, struct module *owner)
 {
        uart401_devc *devc;
        char *name = "MPU-401 (UART) MIDI";
@@ -311,6 +306,9 @@ void attach_uart401(struct address_info *hw_config)
        memcpy((char *) midi_devs[devc->my_dev], (char *) &uart401_operations,
               sizeof(struct midi_operations));
 
+       if (owner)
+               midi_devs[devc->my_dev]->owner = owner;
+       
        midi_devs[devc->my_dev]->devc = devc;
        midi_devs[devc->my_dev]->converter = (struct synth_operations *)kmalloc(sizeof(struct synth_operations), GFP_KERNEL);
        if (midi_devs[devc->my_dev]->converter == NULL)
@@ -473,9 +471,9 @@ static int __init init_uart401(void)
                printk(KERN_INFO "MPU-401 UART driver Copyright (C) Hannu Savolainen 1993-1997");
                if (probe_uart401(&cfg_mpu) == 0)
                        return -ENODEV;
-               attach_uart401(&cfg_mpu);
+               attach_uart401(&cfg_mpu, THIS_MODULE);
        }
-       SOUND_LOCK;
+
        return 0;
 }
 
@@ -483,7 +481,6 @@ static void __exit cleanup_uart401(void)
 {
        if (cfg_mpu.io_base != -1 && cfg_mpu.irq != -1)
                unload_uart401(&cfg_mpu);
-       SOUND_LOCK_END;
 }
 
 module_init(init_uart401);
index 2bff6a655e386c1c5d42ec7a25640144fe62ec20..e42e7d16ca2a6b5c4f472a10a748dbdbe6bea450 100644 (file)
@@ -26,7 +26,6 @@
  */
 
 #include "sound_config.h"
-#include "soundmodule.h"
 
 static int uart6850_base = 0x330;
 
@@ -148,7 +147,6 @@ static int uart6850_open(int dev, int mode,
                  return -EBUSY;
        };
 
-       MOD_INC_USE_COUNT;
        uart6850_cmd(UART_RESET);
        uart6850_input_loop();
        midi_input_intr = input;
@@ -165,7 +163,6 @@ static void uart6850_close(int dev)
        uart6850_cmd(UART_MODE_ON);
        del_timer(&uart6850_timer);
        uart6850_opened = 0;
-       MOD_DEC_USE_COUNT;
 }
 
 static int uart6850_out(int dev, unsigned char midi_byte)
@@ -234,18 +231,18 @@ static inline int uart6850_buffer_status(int dev)
 
 static struct midi_operations uart6850_operations =
 {
-       {"6850 UART", 0, 0, SNDCARD_UART6850},
-       &std_midi_synth,
-       {0},
-       uart6850_open,
-       uart6850_close,
-       NULL, /* ioctl */
-       uart6850_out,
-       uart6850_start_read,
-       uart6850_end_read,
-       uart6850_kick,
-       uart6850_command,
-       uart6850_buffer_status
+       owner:          THIS_MODULE,
+       info:           {"6850 UART", 0, 0, SNDCARD_UART6850},
+       converter:      &std_midi_synth,
+       in_info:        {0},
+       open:           uart6850_open,
+       close:          uart6850_close,
+       outputc:        uart6850_out,
+       start_read:     uart6850_start_read,
+       end_read:       uart6850_end_read,
+       kick:           uart6850_kick,
+       command:        uart6850_command,
+       buffer_status:  uart6850_buffer_status
 };
 
 
@@ -338,14 +335,12 @@ static int __init init_uart6850(void)
        if (probe_uart6850(&cfg_mpu))
                return -ENODEV;
 
-       SOUND_LOCK;
        return 0;
 }
 
 static void __exit cleanup_uart6850(void)
 {
        unload_uart6850(&cfg_mpu);
-       SOUND_LOCK_END;
 }
 
 module_init(init_uart6850);
index 9c18ff5c8e463a8969382add0d44954609014be3..4362de3467a4005088c0098a1dc323edc44b663c 100644 (file)
@@ -23,7 +23,6 @@
 #include <linux/module.h>
 
 #include "sound_config.h"
-#include "soundmodule.h"
 
 #include "v_midi.h"
 
@@ -136,36 +135,30 @@ static inline int v_midi_ioctl (int dev, unsigned cmd, caddr_t arg)
 
 static struct midi_operations v_midi_operations =
 {
-       {"Loopback MIDI Port 1", 0, 0, SNDCARD_VMIDI},
-       &std_midi_synth,
-       {0},
-       v_midi_open,
-       v_midi_close,
-       v_midi_ioctl,
-       v_midi_out,
-       v_midi_start_read,
-       v_midi_end_read,
-       NULL,
-       NULL,
-       NULL,
-       NULL
+       owner:          THIS_MODULE,
+       info:           {"Loopback MIDI Port 1", 0, 0, SNDCARD_VMIDI},
+       converter:      &std_midi_synth,
+       in_info:        {0},
+       open:           v_midi_open,
+       close:          v_midi_close,
+       ioctl:          v_midi_ioctl,
+       outputc:        v_midi_out,
+       start_read:     v_midi_start_read,
+       end_read:       v_midi_end_read,
 };
 
 static struct midi_operations v_midi_operations2 =
 {
-       {"Loopback MIDI Port 2", 0, 0, SNDCARD_VMIDI},
-       &std_midi_synth,
-       {0},
-       v_midi_open,
-       v_midi_close,
-       v_midi_ioctl,
-       v_midi_out,
-       v_midi_start_read,
-       v_midi_end_read,
-       NULL,
-       NULL,
-       NULL,
-       NULL
+       owner:          THIS_MODULE,
+       info:           {"Loopback MIDI Port 2", 0, 0, SNDCARD_VMIDI},
+       converter:      &std_midi_synth,
+       in_info:        {0},
+       open:           v_midi_open,
+       close:          v_midi_close,
+       ioctl:          v_midi_ioctl,
+       outputc:        v_midi_out,
+       start_read:     v_midi_start_read,
+       end_read:       v_midi_end_read,
 };
 
 /*
@@ -284,15 +277,12 @@ static int __init init_vmidi(void)
                return -ENODEV;
        attach_v_midi(&cfg);
 
-       SOUND_LOCK;
-
        return 0;
 }
 
 static void __exit cleanup_vmidi(void)
 {
        unload_v_midi(&cfg);
-       SOUND_LOCK_END;
 }
 
 module_init(init_vmidi);
index 01fb315ab37fcd36e0a6454c412e4f475b014edd..aa7e3121b036eda230a383bcff967decf0752880 100644 (file)
@@ -26,7 +26,6 @@
 #include <asm/system.h>
 
 #include "sound_config.h"
-#include "soundmodule.h"
 #include "vidc.h"
 
 #ifndef _SIOC_TYPE
@@ -362,6 +361,7 @@ static void vidc_audio_trigger(int dev, int enable_bits)
 
 static struct audio_driver vidc_audio_driver =
 {
+       owner:                  THIS_MODULE,
        open:                   vidc_audio_open,
        close:                  vidc_audio_close,
        output_block:           vidc_audio_output_block,
@@ -377,6 +377,7 @@ static struct audio_driver vidc_audio_driver =
 };
 
 static struct mixer_operations vidc_mixer_operations = {
+       owner:          THIS_MODULE,
        id:             "VIDC",
        name:           "VIDCsound",
        ioctl:          vidc_mixer_ioctl
@@ -519,16 +520,12 @@ static void __exit unload_vidc(struct address_info *hw_config)
 }
 
 static struct address_info cfg;
-/*
- * Note! Module use count is handled by SOUNDLOCK/SOUND_LOCK_END
- */
 
 static int __init init_vidc(void)
 {
        if (probe_vidc(&cfg) == 0)
                return -ENODEV;
 
-       SOUND_LOCK;
        attach_vidc(&cfg);
 
        return 0;
@@ -537,7 +534,6 @@ static int __init init_vidc(void)
 static void __exit cleanup_vidc(void)
 {
        unload_vidc(&cfg);
-       SOUND_LOCK_END;
 }
 
 module_init(init_vidc);
index 75d1a897750b6f111d3974046e816919ae7f6453..e802fa4cfe366b0deb7dc592795e9b46d2b189d1 100644 (file)
@@ -42,7 +42,6 @@
 #include <asm/hardware.h>
 #include <asm/system.h>
 
-#include "soundmodule.h"
 #include "sound_config.h"
 #include "waveartist.h"
 
@@ -801,22 +800,21 @@ waveartist_set_bits(int dev, unsigned int arg)
 }
 
 static struct audio_driver waveartist_audio_driver = {
-       waveartist_open,
-       waveartist_close,
-       waveartist_output_block,
-       waveartist_start_input,
-       waveartist_ioctl,
-       waveartist_prepare_for_input,
-       waveartist_prepare_for_output,
-       waveartist_halt,
-       NULL,
-       NULL,
-       waveartist_halt_input,
-       waveartist_halt_output,
-       waveartist_trigger,
-       waveartist_set_speed,
-       waveartist_set_bits,
-       waveartist_set_channels
+       owner:          THIS_MODULE,
+       open:           waveartist_open,
+       close:          waveartist_close,
+       output_block:   waveartist_output_block,
+       start_input:    waveartist_start_input,
+       ioctl:          waveartist_ioctl,
+       prepare_for_input:      waveartist_prepare_for_input,
+       prepare_for_output:     waveartist_prepare_for_output,
+       halt_io:        waveartist_halt,
+       halt_input:     waveartist_halt_input,
+       halt_output:    waveartist_halt_output,
+       trigger:        waveartist_trigger,
+       set_speed:      waveartist_set_speed,
+       set_bits:       waveartist_set_bits,
+       set_channels:   waveartist_set_channels
 };
 
 
@@ -1186,9 +1184,10 @@ waveartist_mixer_ioctl(int dev, unsigned int cmd, caddr_t arg)
 
 static struct mixer_operations waveartist_mixer_operations =
 {
-       "WaveArtist",
-       "WaveArtist NetWinder",
-       waveartist_mixer_ioctl
+       owner:  THIS_MODULE,
+       id:     "WaveArtist",
+       name:   "WaveArtist NetWinder",
+       ioctl:  waveartist_mixer_ioctl
 };
 
 static int
@@ -1794,16 +1793,13 @@ static int __init init_waveartist(void)
        attach_waveartist(&cfg);
        attached = 1;
 
-       SOUND_LOCK;
        return 0;
 }
 
 static void __exit cleanup_waveartist(void)
 {
-       if (attached) {
-               SOUND_LOCK_END;
+       if (attached)
                unload_waveartist(&cfg);
-       }
 }
 
 module_init(init_waveartist);
index 3fb7cce2eb7072493e87a7f2e9c50ea443403d94..5a1a161d918be2154a74621c0e82091124b1b07f 100644 (file)
@@ -78,7 +78,6 @@
 #include <linux/delay.h>
 
 #include "sound_config.h"
-#include "soundmodule.h"
 
 #include <linux/wavefront.h>
 
@@ -2115,28 +2114,25 @@ wavefront_oss_load_patch (int devno, int format, const char *addr,
 
 static struct synth_operations wavefront_operations =
 {
-       "WaveFront",
-       &wavefront_info,
-       0,
-       SYNTH_TYPE_SAMPLE,
-       SAMPLE_TYPE_WAVEFRONT,
-       wavefront_oss_open,
-       wavefront_oss_close,
-       wavefront_oss_ioctl,
-
-       midi_synth_kill_note,
-       midi_synth_start_note,
-       midi_synth_set_instr,
-       midi_synth_reset,
-       NULL, /* hw_control */
-       midi_synth_load_patch,
-       midi_synth_aftertouch,
-       midi_synth_controller,
-       midi_synth_panning,
-       NULL, /* volume method */
-       midi_synth_bender,
-       NULL, /* alloc voice */
-       midi_synth_setup_voice
+       owner:          THIS_MODULE,
+       id:             "WaveFront",
+       info:           &wavefront_info,
+       midi_dev:       0,
+       synth_type:     SYNTH_TYPE_SAMPLE,
+       synth_subtype:  SAMPLE_TYPE_WAVEFRONT,
+       open:           wavefront_oss_open,
+       close:          wavefront_oss_close,
+       ioctl:          wavefront_oss_ioctl,
+       kill_note:      midi_synth_kill_note,
+       start_note:     midi_synth_start_note,
+       set_instr:      midi_synth_set_instr,
+       reset:          midi_synth_reset,
+       load_patch:     midi_synth_load_patch,
+       aftertouch:     midi_synth_aftertouch,
+       controller:     midi_synth_controller,
+       panning:        midi_synth_panning,
+       bender:         midi_synth_bender,
+       setup_voice:    midi_synth_setup_voice
 };
 #endif OSS_SUPPORT_SEQ
 
@@ -3569,14 +3565,12 @@ static int __init init_wavfront (void)
                return -EIO;
        }
 
-       SOUND_LOCK;
        return 0;
 }
 
 static void __exit cleanup_wavfront (void)
 {
        uninstall_wavefront ();
-       SOUND_LOCK_END;
 }
 
 module_init(init_wavfront);
index 2d7d50fe93954400f9ad3cce3a924c1e51dab552..59e5a04da53f4383939b0e115d0b5332b96153d5 100644 (file)
@@ -51,7 +51,6 @@
 
 #include <linux/init.h>
 #include "sound_config.h"
-#include "soundmodule.h"
 
 #include <linux/wavefront.h>
 
@@ -550,19 +549,16 @@ static struct midi_operations  wf_mpu_midi_operations[2];
 
 static struct midi_operations wf_mpu_midi_proto =
 {
-       {"WF-MPU MIDI", 0, MIDI_CAP_MPU401, SNDCARD_MPU401},
-       NULL,  /*converter*/
-       {0},   /* in_info */
-       wf_mpu_open,
-       wf_mpu_close,
-       wf_mpu_ioctl,
-       wf_mpu_out,
-       wf_mpu_start_read,
-       wf_mpu_end_read,
-       NULL,
-       NULL,
-       wf_mpu_buffer_status,
-       NULL
+       owner:          THIS_MODULE,
+       info:           {"WF-MPU MIDI", 0, MIDI_CAP_MPU401, SNDCARD_MPU401},
+       in_info:        {0},   /* in_info */
+       open:           wf_mpu_open,
+       close:          wf_mpu_close,
+       ioctl:          wf_mpu_ioctl,
+       outputc:        wf_mpu_out,
+       start_read:     wf_mpu_start_read,
+       end_read:       wf_mpu_end_read,
+       buffer_status:  wf_mpu_buffer_status,
 };
 
 static struct synth_info wf_mpu_synth_info_proto =
@@ -671,28 +667,27 @@ wf_mpu_synth_close (int dev)
 
 static struct synth_operations wf_mpu_synth_proto =
 {
-       "WaveFront (ICS2115)",
-       NULL,  /* info field, filled in during configuration */
-       0,     /* MIDI dev XXX should this be -1 ? */
-       SYNTH_TYPE_MIDI,
-       SAMPLE_TYPE_WAVEFRONT,
-       wf_mpu_synth_open,
-       wf_mpu_synth_close,
-       wf_mpu_synth_ioctl,
-       midi_synth_kill_note,
-       midi_synth_start_note,
-       midi_synth_set_instr,
-       midi_synth_reset,
-       midi_synth_hw_control,
-       midi_synth_load_patch,
-       midi_synth_aftertouch,
-       midi_synth_controller,
-       midi_synth_panning,
-       NULL,
-       midi_synth_bender,
-       NULL,                           /* alloc */
-       midi_synth_setup_voice,
-       midi_synth_send_sysex
+       owner:          THIS_MODULE,
+       id:             "WaveFront (ICS2115)",
+       info:           NULL,  /* info field, filled in during configuration */
+       midi_dev:       0,     /* MIDI dev XXX should this be -1 ? */
+       synth_type:     SYNTH_TYPE_MIDI,
+       synth_subtype:  SAMPLE_TYPE_WAVEFRONT,
+       open:           wf_mpu_synth_open,
+       close:          wf_mpu_synth_close,
+       ioctl:          wf_mpu_synth_ioctl,
+       kill_note:      midi_synth_kill_note,
+       start_note:     midi_synth_start_note,
+       set_instr:      midi_synth_set_instr,
+       reset:          midi_synth_reset,
+       hw_control:     midi_synth_hw_control,
+       load_patch:     midi_synth_load_patch,
+       aftertouch:     midi_synth_aftertouch,
+       controller:     midi_synth_controller,
+       panning:        midi_synth_panning,
+       bender:         midi_synth_bender,
+       setup_voice:    midi_synth_setup_voice,
+       send_sysex:     midi_synth_send_sysex
 };
 
 static int
index a221a05d0d30e1fbc1fbcd6ac38808c05607066a..5762eea1362eb2a01eda839c9fab851164eec60c 100644 (file)
@@ -54,7 +54,6 @@
 #include <asm/io.h>
 
 #include "sound_config.h"
-#include "soundmodule.h"
 #include "sb.h"
 
 #include "724hwmcode.h"
 
 /* ---------------------------------------------------------------------- */
 
-#ifndef SOUND_LOCK
-#define SOUND_LOCK do {} while (0)
-#define SOUND_LOCK_END do {} while (0)
-#endif
-
 #ifndef PCI_VENDOR_ID_YAMAHA
 #define PCI_VENDOR_ID_YAMAHA  0x1073
 #endif
@@ -641,7 +635,7 @@ static int __init ymf7xx_init(struct pci_dev *pcidev)
 
 static void __init ymf7xxsb_attach_sb(struct address_info *hw_config)
 {
-       if(!sb_dsp_init(hw_config))
+       if(!sb_dsp_init(hw_config, THIS_MODULE))
                hw_config->slots[0] = -1;
 }
 
@@ -784,7 +778,7 @@ static int __init ymf7xxsb_init_one (struct pci_dev *pcidev, const struct pci_de
                        ymf7xxsb_unload_sb (&sb_data[cards], 0);
                        return -ENODEV;
                }
-               ymf7xxsb_attach_midi (&mpu_data[cards]);
+               ymf7xxsb_attach_midi (&mpu_data[cards], THIS_MODULE);
        }
 #endif
 
@@ -804,11 +798,6 @@ static int __init init_ymf7xxsb_module(void)
 {
        int i;
 
-       /*
-        *      Binds us to the sound subsystem 
-        */
-       SOUND_LOCK;
-
        if ( master_vol < 0 ) master_vol  = 50;
        if ( master_vol > 100 ) master_vol = 100;
 
@@ -816,10 +805,8 @@ static int __init init_ymf7xxsb_module(void)
                ymfbase[i] = NULL;
 
        i = pci_module_init (&ymf7xxsb_driver);
-       if (i < 0) {
-               SOUND_LOCK_END;
+       if (i < 0)
                return i;
-       }
 
        printk (KERN_INFO PFX YMFSB_CARD_NAME " loaded\n");
        
@@ -853,10 +840,6 @@ static void __exit cleanup_ymf7xxsb_module(void)
 
        free_iomaps();
 
-       /*
-        *      Final clean up with the sound layer
-        */
-       SOUND_LOCK_END;
 }
 
 MODULE_AUTHOR("Daisuke Nagano, breeze.nagano@nifty.ne.jp");
index e8a3738f8f1701ce4c64640ea54f10773df30b2d..1c611ebee422144f9d6a6c372665ea73724fbb87 100644 (file)
@@ -4,14 +4,14 @@
  *
  * (c) 1998,1999,2000 Petr Vandrovec <vandrove@vc.cvut.cz>
  *
- * Version: 1.21 1999/01/09
+ * Version: 1.50 2000/08/10
  *
  * MTRR stuff: 1998 Tom Rini <trini@kernel.crashing.org>
  *
  * Contributors: "menion?" <menion@mindless.com>
  *                     Betatesting, fixes, ideas
  *
- *               "Kurt Garloff" <garloff@kg1.ping.de>
+ *               "Kurt Garloff" <garloff@suse.de>
  *                     Betatesting, fixes, ideas, videomodes, videomodes timmings
  *
  *               "Tom Rini" <trini@kernel.crashing.org>
@@ -63,6 +63,9 @@
  *               "Mark Vojkovich" <mvojkovi@ucsd.edu>
  *                     G400 support
  *
+ *               "Ken Aaker" <kdaaker@rchland.vnet.ibm.com>
+ *                     memtype extension (needed for GXT130P RS/6000 adapter)
+ *
  * (following author is not in any relation with this code, but his code
  *  is included in this driver)
  *
@@ -755,7 +758,7 @@ static int MGAG100_preinit(WPMINFO struct matrox_hw_state* hw){
        if (ACCESS_FBINFO(devflags.noinit))
                return 0;
        hw->MXoptionReg &= 0xC0000100;
-       hw->MXoptionReg |= 0x00078020;
+       hw->MXoptionReg |= 0x00000020;
        if (ACCESS_FBINFO(devflags.novga))
                hw->MXoptionReg &= ~0x00000100;
        if (ACCESS_FBINFO(devflags.nobios))
@@ -763,13 +766,13 @@ static int MGAG100_preinit(WPMINFO struct matrox_hw_state* hw){
        if (ACCESS_FBINFO(devflags.nopciretry))
                hw->MXoptionReg |=  0x20000000;
        pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, hw->MXoptionReg);
-       pci_read_config_dword(ACCESS_FBINFO(pcidev), 0x50, &reg50);
-       reg50 &= ~0x3000;
-       pci_write_config_dword(ACCESS_FBINFO(pcidev), 0x50, reg50);
-
        DAC1064_setmclk(PMINFO hw, DAC1064_OPT_MDIV2 | DAC1064_OPT_GDIV3 | DAC1064_OPT_SCLK_PCI, 133333);
 
        if (ACCESS_FBINFO(devflags.accelerator) == FB_ACCEL_MATROX_MGAG100) {
+               pci_read_config_dword(ACCESS_FBINFO(pcidev), 0x50, &reg50);
+               reg50 &= ~0x3000;
+               pci_write_config_dword(ACCESS_FBINFO(pcidev), 0x50, reg50);
+
                hw->MXoptionReg |= 0x1080;
                pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, hw->MXoptionReg);
                mga_outl(M_CTLWTST, 0x00000300);
@@ -797,20 +800,45 @@ static int MGAG100_preinit(WPMINFO struct matrox_hw_state* hw){
                        hw->MXoptionReg &= ~0x1000;
                }
 #endif
+               hw->MXoptionReg |= 0x00078020;
+       } else  if (ACCESS_FBINFO(devflags.accelerator) == FB_ACCEL_MATROX_MGAG200) {
+               pci_read_config_dword(ACCESS_FBINFO(pcidev), 0x50, &reg50);
+               reg50 &= ~0x3000;
+               pci_write_config_dword(ACCESS_FBINFO(pcidev), 0x50, reg50);
+
+               if (ACCESS_FBINFO(devflags.memtype) == -1)
+                       ACCESS_FBINFO(devflags.memtype) = 3;
+               hw->MXoptionReg |= (ACCESS_FBINFO(devflags.memtype) & 7) << 10;
+               if (ACCESS_FBINFO(devflags.sgram))
+                       hw->MXoptionReg |= 0x4000;
+               mga_outl(M_CTLWTST, 0x042450A1);
+               mga_outl(M_MEMRDBK, 0x00000108);
+               udelay(200);
+               mga_outl(M_MACCESS, 0x00000000);
+               mga_outl(M_MACCESS, 0x00008000);
+               udelay(100);
+               mga_outw(M_MEMRDBK, 0x00000108);
+               hw->MXoptionReg |= 0x00078020;
        } else {
-               hw->MXoptionReg |= 0x00000C00;
+               pci_read_config_dword(ACCESS_FBINFO(pcidev), 0x50, &reg50);
+               reg50 &= ~0x00000100;
+               reg50 |=  0x00000000;
+               pci_write_config_dword(ACCESS_FBINFO(pcidev), 0x50, reg50);
+
+               if (ACCESS_FBINFO(devflags.memtype) == -1)
+                       ACCESS_FBINFO(devflags.memtype) = 0;
+               hw->MXoptionReg |= (ACCESS_FBINFO(devflags.memtype) & 7) << 10;
                if (ACCESS_FBINFO(devflags.sgram))
                        hw->MXoptionReg |= 0x4000;
                mga_outl(M_CTLWTST, 0x042450A1);
-               mga_outb(0x1E47, 0x00);
-               mga_outb(0x1E46, 0x00);
-               udelay(10);
-               mga_outb(0x1C05, 0x00);
-               mga_outb(0x1C05, 0x80);
+               mga_outl(M_MEMRDBK, 0x00000108);
+               udelay(200);
+               mga_outl(M_MACCESS, 0x00000000);
+               mga_outl(M_MACCESS, 0x00008000);
                udelay(100);
-               mga_outw(0x1E44, 0x0108);
+               mga_outl(M_MEMRDBK, 0x00000108);
+               hw->MXoptionReg |= 0x00040020;
        }
-       hw->MXoptionReg = (hw->MXoptionReg & ~0x1F8000) | 0x78000;
        pci_write_config_dword(ACCESS_FBINFO(pcidev), PCI_OPTION_REG, hw->MXoptionReg);
        return 0;
 }
index 67dc556b06705f1fdff1358b382eabcd11bb8b35..7c65eaa0f982da4d7729c55fd8c216bfa9cfce6e 100644 (file)
@@ -4,14 +4,14 @@
  *
  * (c) 1998,1999,2000 Petr Vandrovec <vandrove@vc.cvut.cz>
  *
- * Version: 1.21 2000/01/09
+ * Version: 1.50 2000/08/10
  *
  * MTRR stuff: 1998 Tom Rini <trini@kernel.crashing.org>
  *
  * Contributors: "menion?" <menion@mindless.com>
  *                     Betatesting, fixes, ideas
  *
- *               "Kurt Garloff" <garloff@kg1.ping.de>
+ *               "Kurt Garloff" <garloff@suse.de>
  *                     Betatesting, fixes, ideas, videomodes, videomodes timmings
  *
  *               "Tom Rini" <trini@kernel.crashing.org>
index a28388e1ebdf65b2a2f8d9dd93db77f450d19f68..d79e5e3e48b875812e7590529a8136f52dcf0867 100644 (file)
@@ -4,14 +4,14 @@
  *
  * (c) 1998,1999,2000 Petr Vandrovec <vandrove@vc.cvut.cz>
  *
- * Version: 1.21 2000/01/09
+ * Version: 1.50 2000/08/10
  *
  * MTRR stuff: 1998 Tom Rini <trini@kernel.crashing.org>
  *
  * Contributors: "menion?" <menion@mindless.com>
  *                     Betatesting, fixes, ideas
  *
- *               "Kurt Garloff" <garloff@kg1.ping.de>
+ *               "Kurt Garloff" <garloff@suse.de>
  *                     Betatesting, fixes, ideas, videomodes, videomodes timmings
  *
  *               "Tom Rini" <trini@kernel.crashing.org>
index 9ddb301405f48e2eba182d8a77db7aabe176a5c2..9a21d5dc7c50adf5f23d8d2850209ce6947338e9 100644 (file)
@@ -4,14 +4,14 @@
  *
  * (c) 1998,1999,2000 Petr Vandrovec <vandrove@vc.cvut.cz>
  *
- * Version: 1.21 1999/01/09
+ * Version: 1.50 2000/08/10
  *
  * MTRR stuff: 1998 Tom Rini <trini@kernel.crashing.org>
  *
  * Contributors: "menion?" <menion@mindless.com>
  *                     Betatesting, fixes, ideas
  *
- *               "Kurt Garloff" <garloff@kg1.ping.de>
+ *               "Kurt Garloff" <garloff@suse.de>
  *                     Betatesting, fixes, ideas, videomodes, videomodes timmings
  *
  *               "Tom Rini" <trini@kernel.crashing.org>
@@ -69,6 +69,9 @@
  *               "Anton Altaparmakov" <AntonA@bigfoot.com>
  *                     G400 MAX/non-MAX distinction
  *
+ *               "Ken Aaker" <kdaaker@rchland.vnet.ibm.com>
+ *                     memtype extension (needed for GXT130P RS/6000 adapter)
+ *
  * (following author is not in any relation with this code, but his code
  *  is included in this driver)
  *
@@ -1312,6 +1315,7 @@ static unsigned int fv = 0;               /* "matrox:fv:xxxxx" */
 static unsigned int fh = 0;            /* "matrox:fh:xxxxxk" */
 static unsigned int maxclk = 0;                /* "matrox:maxclk:xxxxM" */
 static int dfp = 0;                    /* "matrox:dfp */
+static int memtype = -1;               /* "matrox:memtype:xxx" */
 static char fontname[64];              /* "matrox:font:xxxxx" */
 
 #ifndef MODULE
@@ -2037,6 +2041,9 @@ static int matroxfb_probe(struct pci_dev* pdev, const struct pci_device_id* dumm
        memcpy(ACCESS_FBINFO(fbcon.fontname), fontname, sizeof(ACCESS_FBINFO(fbcon.fontname)));
        /* DEVFLAGS */
        ACCESS_FBINFO(devflags.inverse) = inverse;
+       ACCESS_FBINFO(devflags.memtype) = memtype;
+       if (memtype != -1)
+               noinit = 0;
        if (cmd & PCI_COMMAND_MEMORY) {
                ACCESS_FBINFO(devflags.novga) = novga;
                ACCESS_FBINFO(devflags.nobios) = nobios;
@@ -2050,6 +2057,7 @@ static int matroxfb_probe(struct pci_dev* pdev, const struct pci_device_id* dumm
                ACCESS_FBINFO(devflags.nobios) = 1;
                ACCESS_FBINFO(devflags.noinit) = 0;
        }
+
        ACCESS_FBINFO(devflags.nopciretry) = no_pci_retry;
        ACCESS_FBINFO(devflags.mga_24bpp_fix) = inv24;
        ACCESS_FBINFO(devflags.precise_width) = option_precise_width;
@@ -2094,10 +2102,42 @@ static void pci_remove_matrox(struct pci_dev* pdev) {
        matroxfb_remove(PMINFO 1);
 }
 
+static struct pci_device_id matroxfb_devices[] __devinitdata = {
+#ifdef CONFIG_FB_MATROX_MILLENIUM
+       {PCI_VENDOR_ID_MATROX,  PCI_DEVICE_ID_MATROX_MIL,
+               PCI_ANY_ID,     PCI_ANY_ID,     0, 0, 0},
+       {PCI_VENDOR_ID_MATROX,  PCI_DEVICE_ID_MATROX_MIL_2,
+               PCI_ANY_ID,     PCI_ANY_ID,     0, 0, 0},
+       {PCI_VENDOR_ID_MATROX,  PCI_DEVICE_ID_MATROX_MIL_2_AGP,
+               PCI_ANY_ID,     PCI_ANY_ID,     0, 0, 0},
+#endif
+#ifdef CONFIG_FB_MATROX_MYSTIQUE
+       {PCI_VENDOR_ID_MATROX,  PCI_DEVICE_ID_MATROX_MYS,
+               PCI_ANY_ID,     PCI_ANY_ID,     0, 0, 0},
+#endif
+#ifdef CONFIG_FB_MATROX_G100
+       {PCI_VENDOR_ID_MATROX,  PCI_DEVICE_ID_MATROX_G100,
+               PCI_ANY_ID,     PCI_ANY_ID,     0, 0, 0},
+       {PCI_VENDOR_ID_MATROX,  PCI_DEVICE_ID_MATROX_G100_AGP,
+               PCI_ANY_ID,     PCI_ANY_ID,     0, 0, 0},
+       {PCI_VENDOR_ID_MATROX,  PCI_DEVICE_ID_MATROX_G200_PCI,
+               PCI_ANY_ID,     PCI_ANY_ID,     0, 0, 0},
+       {PCI_VENDOR_ID_MATROX,  PCI_DEVICE_ID_MATROX_G200_AGP,
+               PCI_ANY_ID,     PCI_ANY_ID,     0, 0, 0},
+       {PCI_VENDOR_ID_MATROX,  PCI_DEVICE_ID_MATROX_G400_AGP,
+               PCI_ANY_ID,     PCI_ANY_ID,     0, 0, 0},
+#endif
+       {0,                     0,
+               0,              0,              0, 0, 0}
+};
+
+MODULE_DEVICE_TABLE(pci, matroxfb_devices);
+
 static struct pci_driver matroxfb_driver = {
-       name:   "matroxfb",
-       probe:  matroxfb_probe,
-       remove: pci_remove_matrox,
+       name:           "matroxfb",
+       id_table:       matroxfb_devices,
+       probe:          matroxfb_probe,
+       remove:         pci_remove_matrox,
 };
 
 /* **************************** init-time only **************************** */
@@ -2378,6 +2418,8 @@ int __init matroxfb_setup(char *options) {
                        sgram = 1;
                else if (!strcmp(this_opt, "sdram"))
                        sgram = 0;
+               else if (!strncmp(this_opt, "memtype:", 8))
+                       memtype = simple_strtoul(this_opt+8, NULL, 0);
                else {
                        int value = 1;
 
@@ -2461,6 +2503,8 @@ MODULE_PARM(nobios, "i");
 MODULE_PARM_DESC(nobios, "Disables ROM BIOS (0 or 1=disabled) (default=do not change BIOS state)");
 MODULE_PARM(noinit, "i");
 MODULE_PARM_DESC(noinit, "Disables W/SG/SD-RAM and bus interface initialization (0 or 1=do not initialize) (default=0)");
+MODULE_PARM(memtype, "i");
+MODULE_PARM_DESC(memtype, "Memory type for G200/G400 (see Documentation/fb/matroxfb.txt for explanation) (default=3 for G200, 0 for G400)");
 MODULE_PARM(mtrr, "i");
 MODULE_PARM_DESC(mtrr, "This speeds up video memory accesses (0=disabled or 1) (default=1)");
 MODULE_PARM(sgram, "i");
index c8a47fe9e25970a98e4b2c1c42e62dc5fc6a369c..125f8be4f511a5f6e6863e72230f07062da9cae9 100644 (file)
@@ -2,7 +2,7 @@
  *
  * Hardware accelerated Matrox Millennium I, II, Mystique, G100, G200 and G400
  *
- * (c) 1998,1999 Petr Vandrovec <vandrove@vc.cvut.cz>
+ * (c) 1998,1999,2000 Petr Vandrovec <vandrove@vc.cvut.cz>
  *
  */
 #ifndef __MATROXFB_H__
@@ -530,6 +530,7 @@ struct matrox_fb_info {
                unsigned int    textvram;       /* character cells */
                unsigned int    ydstorg;        /* offset in bytes from video start to usable memory */
                                                /* 0 except for 6MB Millenium */
+               int             memtype;
                              } devflags;
        struct display_switch   dispsw;
        struct {
@@ -695,6 +696,7 @@ void matroxfb_unregister_driver(struct matroxfb_driver* drv);
 #define M_VCOUNT       0x1E20
 
 #define M_RESET                0x1E40
+#define M_MEMRDBK      0x1E44
 
 #define M_AGP2PLL      0x1E4C
 
index 126ab9b66fbca1de24b22a0cac7b23aa0782f6c9..ff9d6fb751c18f98693ddbad5ad5db10b387edc8 100644 (file)
@@ -4,14 +4,14 @@
  *
  * (c) 1998,1999,2000 Petr Vandrovec <vandrove@vc.cvut.cz>
  *
- * Version: 1.21 2000/01/09
+ * Version: 1.50 2000/08/10
  *
  * MTRR stuff: 1998 Tom Rini <trini@kernel.crashing.org>
  *
  * Contributors: "menion?" <menion@mindless.com>
  *                     Betatesting, fixes, ideas
  *
- *               "Kurt Garloff" <garloff@kg1.ping.de>
+ *               "Kurt Garloff" <garloff@suse.de>
  *                     Betatesting, fixes, ideas, videomodes, videomodes timmings
  *
  *               "Tom Rini" <trini@kernel.crashing.org>
index 93aae1c7f6833e726c2bb9314681f8f172940f76..7c3efb3667de15d61122acfe3f748b4116a5084d 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -402,7 +402,10 @@ static int exec_mmap(void)
        if (mm) {
                struct mm_struct *active_mm = current->active_mm;
 
-               init_new_context(current, mm);
+               if (init_new_context(current, mm)) {
+                       mmdrop(mm);
+                       return -ENOMEM;
+               }
                task_lock(current);
                current->mm = mm;
                current->active_mm = mm;
index 9ea2223d3f52fc12f57de1b898384bfb05693561..6078c869e68e80b1fc33ad58cfbec26bb5af974b 100644 (file)
@@ -10,7 +10,7 @@
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  *
- * $Id: inode-v23.c,v 1.33 2000/08/09 15:59:06 dwmw2 Exp $
+ * $Id: inode-v23.c,v 1.34 2000/08/10 08:58:00 dwmw2 Exp $
  *
  *
  * Ported to Linux 2.3.x and MTD:
@@ -44,7 +44,6 @@
 #include <linux/fs.h>
 #include <linux/locks.h>
 #include <linux/smp_lock.h>
-#include <linux/sched.h>
 #include <linux/ioctl.h>
 #include <linux/stat.h>
 #include <linux/blkdev.h>
@@ -350,7 +349,7 @@ jffs_new_inode(const struct inode * dir, struct jffs_raw_inode *raw_inode,
        inode->i_mtime = raw_inode->mtime;
        inode->i_ctime = raw_inode->ctime;
        inode->i_blksize = PAGE_SIZE;
-       inode->i_blocks = (raw_inode->dsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       inode->i_blocks = (inode->i_size + 511) >> 9;
        inode->i_version = 0;
        inode->i_flags = sb->s_flags;
        inode->u.generic_ip = (void *)jffs_find_file(c, raw_inode->ino);
@@ -1571,7 +1570,7 @@ jffs_read_inode(struct inode *inode)
        inode->i_mtime = f->mtime;
        inode->i_ctime = f->ctime;
        inode->i_blksize = PAGE_SIZE;
-       inode->i_blocks = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       inode->i_blocks = (inode->i_size + 511) >> 9;
        if (S_ISREG(inode->i_mode)) {
                inode->i_op = &jffs_file_inode_operations;
                inode->i_fop = &jffs_file_operations;
index 97d9e2d22184cff0e11974875a7c42a138f067fb..75ffc6ef043423825373dad65e4a1aaab6622c31 100644 (file)
@@ -1690,7 +1690,8 @@ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
                triple_up(&old_dir->i_zombie,
                          &new_dir->i_zombie,
                          &target->i_zombie);
-               d_rehash(new_dentry);
+               if (d_unhashed(new_dentry))
+                       d_rehash(new_dentry);
                dput(new_dentry);
        } else
                double_up(&old_dir->i_zombie,
index ef51478a19f6bceaf22cdc29ecc874c00015a634..8745b6f88eb7956c8ae5b969368af2319652af0f 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -545,9 +545,9 @@ int do_pipe(int *fd)
        this.len = strlen(name);
        this.hash = inode->i_ino; /* will go */
        dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
-       dentry->d_op = &pipefs_dentry_operations;
        if (!dentry)
                goto close_f12_inode_i_j;
+       dentry->d_op = &pipefs_dentry_operations;
        d_add(dentry, inode);
        f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
        f1->f_dentry = f2->f_dentry = dget(dentry);
index 4cdfb21d5758097c81fbba68ff7c3be3cd44062a..69bfdcaf1d88b0fc354dd8fc63661699841c81fe 100644 (file)
@@ -271,6 +271,8 @@ extern inline unsigned long find_next_zero_bit(void * addr, unsigned long size,
        tmp = *p;
 found_first:
        tmp |= ~0UL << size;
+       if (tmp == ~0UL)        /* Are any bits zero? */
+               return result + size; /* Nope. */
 found_middle:
        return result + ffz(tmp);
 }
index b44e4a42815098f3b72274e0b28254064d41e2fa..219b8bd4ecb4538be85cb248889f4ea68068b790 100644 (file)
@@ -205,11 +205,12 @@ ev4_activate_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm)
 # endif
 #endif
 
-extern inline void
+extern inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
        mm->context = 0;
         tsk->thread.ptbr = ((unsigned long)mm->pgd - IDENT_ADDR) >> PAGE_SHIFT;
+       return 0;
 }
 
 extern inline void
index 6ebf76a6c78541b86a24069fba9c50d088a1766f..f358628a9bda044edb4d18f65d512c69ba603422 100644 (file)
@@ -15,7 +15,7 @@
 #include <asm/proc-fns.h>
 
 #define destroy_context(mm)            do { } while(0)
-#define init_new_context(tsk,mm)       do { } while(0)
+#define init_new_context(tsk,mm)       0
 
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu)
 {
index 75b33ae5c4c0d9ea0216dad3591482ceaf3f6c60..0c201e1d26e7e056a3c2faa1d810e9d0fed8e5e2 100644 (file)
@@ -10,7 +10,7 @@
  * possibly do the LDT unload here?
  */
 #define destroy_context(mm)            do { } while(0)
-#define init_new_context(tsk,mm)       do { } while (0)
+#define init_new_context(tsk,mm)       0
 
 #ifdef CONFIG_SMP
 
index 22c2b22973311bc262e8e6089200985d43059089..a223e9bbdeef04e196944154f01f342c1b493075 100644 (file)
@@ -84,10 +84,11 @@ get_mmu_context (struct mm_struct *mm)
        }
 }
 
-extern inline void
+extern inline int
 init_new_context (struct task_struct *p, struct mm_struct *mm)
 {
        mm->context = 0;
+       return 0;
 }
 
 extern inline void
index d481eb316e651da219c92a6d967dfd3ef3f5fe5b..003933faefaf6a1ef6f0ca6bad283ec710bda40f 100644 (file)
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu)
 {
 }
-extern inline void
+extern inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
        mm->context = virt_to_phys(mm->pgd);
+       return 0;
 }
 
 #define destroy_context(mm)            do { } while(0)
@@ -108,9 +109,10 @@ extern unsigned char ctx_next_to_die;
 extern unsigned char ctx_live[SUN3_CONTEXTS_NUM];
 
 /* set the context for a new task to unmapped */
-static inline void init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
        mm->context = SUN3_INVALID_CONTEXT;
+       return 0;
 }
 
 /* find the context given to this process, and if it hasn't already
index 2e9809bbaacf7fcf8795ffc82a2ac427fc81b763..9be6976e896dd8db01bf5cb194034b5f27a3c0a1 100644 (file)
@@ -57,10 +57,11 @@ get_new_mmu_context(struct mm_struct *mm, unsigned long asid)
  * Initialize the context related info for a new mm_struct
  * instance.
  */
-extern inline void
+extern inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
        mm->context = 0;
+       return 0;
 }
 
 extern inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
index 0fe300bda61e5623f6a7bb830e1d79f7e28efbf1..9979f0b3c18c55661a85e087542c0070e0775fc9 100644 (file)
@@ -70,7 +70,7 @@ get_new_cpu_mmu_context(struct mm_struct *mm, unsigned long cpu)
  * Initialize the context related info for a new mm_struct
  * instance.
  */
-extern inline void
+extern inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
 #ifndef CONFIG_SMP
@@ -82,12 +82,11 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
         * Init the "context" values so that a tlbpid allocation 
         * happens on the first switch.
         */
-       if (mm->context)
-               memset((void *)mm->context, 0, smp_num_cpus * 
-                                               sizeof(unsigned long));
-       else
-               printk("Warning: init_new_context failed\n");
+       if (mm->context == 0)
+               return -ENOMEM;
+       memset((void *)mm->context, 0, smp_num_cpus * sizeof(unsigned long));
 #endif
+       return 0;
 }
 
 extern inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
index bdb1447ed2aec82b1657f7415d3b33da7cf03f0f..414dd1cdc7867205178dac3575d2399c5d385b44 100644 (file)
@@ -77,7 +77,7 @@ do {                                                          \
 /*
  * Set up the context for a new address space.
  */
-#define init_new_context(tsk,mm)       ((mm)->context = NO_CONTEXT)
+#define init_new_context(tsk,mm)       (((mm)->context = NO_CONTEXT), 0)
 
 /*
  * We're finished using the context for an address space.
index c2a215135c0e1d8ebf30228524411e5843138bfe..71f0f66dfbd5d700c2b3611af284ebdf31751b5f 100644 (file)
@@ -12,7 +12,7 @@
 /*
  * get a new mmu context.. S390 don't know about contexts.
  */
-#define init_new_context(tsk,mm)        do { } while (0)
+#define init_new_context(tsk,mm)        0
 
 #define destroy_context(mm)             flush_tlb_mm(mm)
 
index f20096234ce9c062328e87e521b270137c9f0fcd..0227057b93fc11047c77779c7bf7cfc955028d5c 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef __ASM_SH_DMA_H
 #define __ASM_SH_DMA_H
 
+#include <linux/config.h>
 #include <asm/io.h>            /* need byte IO */
 
 #define MAX_DMA_CHANNELS 8
index 3e82b3f62a63d74154e26acd0542e931f7b598c1..9e7de808f7f8770022dd37272461bb3e190a31c8 100644 (file)
@@ -12,6 +12,8 @@
 #ifndef __SH_MACHVEC_INIT_H
 #define __SH_MACHVEC_INIT_H
 
+#include <linux/config.h>
+
 /*
  * In a GENERIC kernel, we have lots of these vectors floating about,
  * all but one of which we want to go away.  In a non-GENERIC kernel,
index 82517a5af3abb5eecfedee2a726b89f4306015c5..268754e71ede125dd4814b9aed91ceb782c10c19 100644 (file)
@@ -67,10 +67,11 @@ get_mmu_context(struct mm_struct *mm)
  * Initialize the context related info for a new mm_struct
  * instance.
  */
-extern __inline__ void init_new_context(struct task_struct *tsk,
+extern __inline__ int init_new_context(struct task_struct *tsk,
                                        struct mm_struct *mm)
 {
        mm->context = NO_CONTEXT;
+       return 0;
 }
 
 /*
index 604c447a9c67cf53d28c0886b851533d4890ce8e..274707e2d288a4962ed85ead4e9a7cd06546384c 100644 (file)
@@ -13,7 +13,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk,
  * Initialize a new mmu context.  This is invoked when a new
  * address space instance (unique or shared) is instantiated.
  */
-#define init_new_context(tsk, mm) ((mm)->context = NO_CONTEXT)
+#define init_new_context(tsk, mm) (((mm)->context = NO_CONTEXT), 0)
 
 /*
  * Destroy a dead context.  This occurs when mmput drops the
index 47022fbba04c09ccf1ee6f73713b68ef23ba833b..e6373384f0556163dc5c08969f7197be698168e3 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: page.h,v 1.53 2000/06/04 08:36:33 anton Exp $
+/* $Id: page.h,v 1.54 2000/08/10 01:04:53 davem Exp $
  * page.h:  Various defines and such for MMU operations on the Sparc for
  *          the Linux kernel.
  *
index ea7b83d5b6b81b243bc5189a2847578cf9668dfb..558df48efde4c071705fd58b8b333ad5d443426f 100644 (file)
@@ -31,7 +31,7 @@ extern void get_new_mmu_context(struct mm_struct *mm);
  * address space instance (unique or shared) is instantiated.
  * This just needs to set mm->context to an invalid context.
  */
-#define init_new_context(__tsk, __mm)  ((__mm)->context = 0UL)
+#define init_new_context(__tsk, __mm)  (((__mm)->context = 0UL), 0)
 
 /* Destroy a dead context.  This occurs when mmput drops the
  * mm_users count to zero, the mmaps have been released, and
index 86bd2640298e25feafb28804f95840539154c979..6c65d3241a8b62818226ae562440b6828fc3c7e7 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: page.h,v 1.35 2000/04/13 04:45:59 davem Exp $ */
+/* $Id: page.h,v 1.36 2000/08/10 01:04:53 davem Exp $ */
 
 #ifndef _SPARC64_PAGE_H
 #define _SPARC64_PAGE_H
index 3cd0250eed7ac6c0faa18dc12fd48e1101216999..a804348231e2c55710224b0cf0a13f03287dc60f 100644 (file)
@@ -29,6 +29,9 @@ static __inline__ unsigned long kmap(struct page * page) {
 
 #define kunmap(page) do { } while (0)
 
+#define kmap_atomic(page,idx)          kmap(page)
+#define kunmap_atomic(page,idx)                kunmap(page)
+
 #endif /* CONFIG_HIGHMEM */
 
 /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */
index 0fcf3bfb21b72d4544fdf8077b2af92f0e4d7280..b01390e6f22ce61abb63d520f85f1a4218ce8564 100644 (file)
@@ -38,6 +38,7 @@ struct sockaddr_ll
 /* Value 4 is still used by obsolete turbo-packet. */
 #define PACKET_RX_RING                 5
 #define PACKET_STATISTICS              6
+#define PACKET_COPY_THRESH             7
 
 struct tpacket_stats
 {
index 44612efa0665bddadeb0cb95bc4d02fc9c2a8bb0..938b43d77fa36a5b697db6a0add5bbe480425a0f 100644 (file)
@@ -62,21 +62,13 @@ struct ip_conntrack_tuple
        } dst;
 };
 
-#define IP_PARTS_NATIVE(n)                     \
-(unsigned int)((n)>>24)&0xFF,                  \
-(unsigned int)((n)>>16)&0xFF,                  \
-(unsigned int)((n)>>8)&0xFF,                   \
-(unsigned int)((n)&0xFF)
-
-#define IP_PARTS(n) IP_PARTS_NATIVE(ntohl(n))
-
 #ifdef __KERNEL__
 
 #define DUMP_TUPLE(tp)                                         \
-DEBUGP("tuple %p: %u %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u\n",      \
+DEBUGP("tuple %p: %u %u.%u.%u.%u:%hu -> %u.%u.%u.%u:%hu\n",    \
        (tp), (tp)->dst.protonum,                               \
-       IP_PARTS((tp)->src.ip), ntohs((tp)->src.u.all),         \
-       IP_PARTS((tp)->dst.ip), ntohs((tp)->dst.u.all))
+       NIPQUAD((tp)->src.ip), ntohs((tp)->src.u.all),          \
+       NIPQUAD((tp)->dst.ip), ntohs((tp)->dst.u.all))
 
 #define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL)
 
index a78776235775f6003e86ebd123277cb4d6e6c8fd..aa08d68c4841ebdf265a99494bc9a88933f5d815 100644 (file)
@@ -26,6 +26,10 @@ typedef struct ipq_packet_msg {
        unsigned int hook;              /* Netfilter hook we rode in on */
        char indev_name[IFNAMSIZ];      /* Name of incoming interface */
        char outdev_name[IFNAMSIZ];     /* Name of outgoing interface */
+       unsigned short hw_protocol;     /* Hardware protocol (network order) */
+       unsigned short hw_type;         /* Hardware type */
+       unsigned char hw_addrlen;       /* Hardware address length */
+       unsigned char hw_addr[8];       /* Hardware address */
        size_t data_len;                /* Length of packet data */
        unsigned char payload[0];       /* Optional packet data */
 } ipq_packet_msg_t;
index f12c936d88743b7aa787f7cad0b059806638b88d..7f68d19517f885667a5b099bc3c5ec07e60f7736 100644 (file)
 #define PCI_VENDOR_ID_PLX              0x10b5
 #define PCI_VENDOR_ID_PLX_ROMULUS      0x106a
 #define PCI_DEVICE_ID_PLX_SPCOM800     0x1076
+#define PCI_DEVICE_ID_PLX_1077         0x1077
 #define PCI_DEVICE_ID_PLX_SPCOM200     0x1103
 #define PCI_DEVICE_ID_PLX_9050         0x9050
 #define PCI_DEVICE_ID_PLX_9060         0x9060
 #define PCI_VENDOR_ID_V3               0x11b0
 #define PCI_DEVICE_ID_V3_V960          0x0001
 #define PCI_DEVICE_ID_V3_V350          0x0001
-#define PCI_DEVICE_ID_V3_V960V2                0x0002
-#define PCI_DEVICE_ID_V3_V350V2                0x0002
+#define PCI_DEVICE_ID_V3_V961          0x0002
+#define PCI_DEVICE_ID_V3_V351          0x0002
 
 #define PCI_VENDOR_ID_NP               0x11bc
 #define PCI_DEVICE_ID_NP_PCI_FDDI      0x0001
 #define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_485          0x0006
 #define PCI_SUBDEVICE_ID_CONNECT_TECH_BH4_485_2_2      0x0007
 #define PCI_SUBDEVICE_ID_CONNECT_TECH_BH2_485          0x0008
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH8_485_2_6      0x0009
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH081101V1       0x000A
+#define PCI_SUBDEVICE_ID_CONNECT_TECH_BH041101V1       0x000B
 
 #define PCI_VENDOR_ID_PICTUREL         0x12c5
 #define PCI_DEVICE_ID_PICTUREL_PCIVST  0x0081
index f27ad591f6c0cb6bb82c54689cd121470a808e44..40738af958a94ff3cd107358acc1f63f8c4eff3c 100644 (file)
@@ -277,9 +277,11 @@ enum
 #define RTAX_CWND RTAX_CWND
        RTAX_ADVMSS,
 #define RTAX_ADVMSS RTAX_ADVMSS
+       RTAX_REORDERING,
+#define RTAX_REORDERING RTAX_REORDERING
 };
 
-#define RTAX_MAX RTAX_ADVMSS
+#define RTAX_MAX RTAX_REORDERING
 
 
 
index ce404ca1a9d7afb7dbc7b2ce20ec56ce3eb67d75..7d4178a542ca861dc4d832b8aa5c5b0ec35d6659 100644 (file)
@@ -123,6 +123,8 @@ struct serial_uart_config {
 #define ASYNC_BUGGY_UART  0x4000 /* This is a buggy UART, skip some safety
                                  * checks.  Note: can be dangerous! */
 
+#define ASYNC_AUTOPROBE         0x8000 /* Port was autoprobed by PCI or PNP code */
+
 #define ASYNC_FLAGS    0x7FFF  /* Possible legal async flags */
 #define ASYNC_USR_MASK 0x3430  /* Legal flags that non-privileged
                                 * users can set or reset */
@@ -137,7 +139,6 @@ struct serial_uart_config {
 #define ASYNC_CHECK_CD         0x02000000 /* i.e., CLOCAL */
 #define ASYNC_SHARE_IRQ                0x01000000 /* for multifunction cards
                                             --- no longer used */
-#define ASYNC_AUTOPROBE                0x00800000 /* Port was autoprobed */
 
 #define ASYNC_INTERNAL_FLAGS   0xFF000000 /* Internal flags */
 
index 850837af2c9c0957719346d83c31e63930844011..c67f8c85abf65fd74d49a10eed31791d1723ad82 100644 (file)
@@ -931,6 +931,12 @@ skb_cow(struct sk_buff *skb, unsigned int headroom)
        return skb;
 }
 
+#define skb_queue_walk(queue, skb) \
+               for (skb = (queue)->next;                       \
+                    (skb != (struct sk_buff *)(queue));        \
+                    skb=skb->next)
+
+
 extern struct sk_buff *                skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err);
 extern unsigned int            datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait);
 extern int                     skb_copy_datagram(struct sk_buff *from, int offset, char *to,int size);
index 7dfe40942a794d3429e5d264efac4a6fe999265c..91913e545ad2c2c8dc53453a427a61632201eeac 100644 (file)
@@ -260,6 +260,15 @@ enum
        NET_TCP_SYNACK_RETRIES=76,
        NET_TCP_MAX_ORPHANS=77,
        NET_TCP_MAX_TW_BUCKETS=78,
+       NET_TCP_FACK=79,
+       NET_TCP_REORDERING=80,
+       NET_TCP_ECN=81,
+       NET_TCP_DSACK=82,
+       NET_TCP_MEM=83,
+       NET_TCP_WMEM=84,
+       NET_TCP_RMEM=85,
+       NET_TCP_APP_WIN=86,
+       NET_TCP_ADV_WIN_SCALE=87,
 };
 
 enum {
index 4bca9c09203e897fdabb56f3b91d3b78cbbcd396..253d72a22c2e33dce2c5ffaca443c508db64173f 100644 (file)
@@ -44,6 +44,7 @@ struct dst_entry
        unsigned                ssthresh;
        unsigned                cwnd;
        unsigned                advmss;
+       unsigned                reordering;
 
        unsigned long           rate_last;      /* rate limiting for ICMP */
        unsigned long           rate_tokens;
index 8bcb17085ef0bdeb130218dc7a16d6d50b5e82cc..2bd12729935b77f7dea1f4d3d3737963f3d534b0 100644 (file)
@@ -199,7 +199,45 @@ struct linux_mib
        unsigned long   TCPPrequeueDropped;
        unsigned long   TCPHPHits;
        unsigned long   TCPHPHitsToUser;
-       unsigned long   __pad[32-26];
+       unsigned long   TCPPureAcks;
+       unsigned long   TCPHPAcks;
+       unsigned long   TCPRenoRecovery;
+       unsigned long   TCPSackRecovery;
+       unsigned long   TCPSACKReneging;
+       unsigned long   TCPFACKReorder;
+       unsigned long   TCPSACKReorder;
+       unsigned long   TCPRenoReorder;
+       unsigned long   TCPTSReorder;
+       unsigned long   TCPFullUndo;
+       unsigned long   TCPPartialUndo;
+       unsigned long   TCPDSACKUndo;
+       unsigned long   TCPLossUndo;
+       unsigned long   TCPLoss;
+       unsigned long   TCPLostRetransmit;
+       unsigned long   TCPRenoFailures;
+       unsigned long   TCPSackFailures;
+       unsigned long   TCPLossFailures;
+       unsigned long   TCPFastRetrans;
+       unsigned long   TCPForwardRetrans;
+       unsigned long   TCPSlowStartRetrans;
+       unsigned long   TCPTimeouts;
+       unsigned long   TCPRenoRecoveryFail;
+       unsigned long   TCPSackRecoveryFail;
+       unsigned long   TCPSchedulerFailed;
+       unsigned long   TCPRcvCollapsed;
+       unsigned long   TCPDSACKOldSent;
+       unsigned long   TCPDSACKOfoSent;
+       unsigned long   TCPDSACKRecv;
+       unsigned long   TCPDSACKOfoRecv;
+       unsigned long   TCPAbortOnSyn;
+       unsigned long   TCPAbortOnData;
+       unsigned long   TCPAbortOnClose;
+       unsigned long   TCPAbortOnMemory;
+       unsigned long   TCPAbortOnTimeout;
+       unsigned long   TCPAbortOnLinger;
+       unsigned long   TCPAbortFailed;
+       unsigned long   TCPMemoryPressures;
+       unsigned long   __pad[64-64];
 };
 
 #define SNMP_INC_STATS(mib, field) ((mib)[2*smp_processor_id()+!in_softirq()].field++)
index 87a8c3941ace019e38d6e2a3b578d668112b2292..291e20361f13020f21ca916dcc17a266abf524a6 100644 (file)
@@ -268,10 +268,12 @@ struct tcp_opt {
                __u8    pingpong;       /* The session is interactive           */
                __u8    blocked;        /* Delayed ACK was blocked by socket lock*/
                __u32   ato;            /* Predicted tick of soft clock         */
+               unsigned long timeout;  /* Currently scheduled timeout          */
                __u32   lrcvtime;       /* timestamp of last received data packet*/
-               __u16   last_seg_size;  /* Size of last incoming segment */
-               __u16   rcv_mss;        /* MSS used for delayed ACK decisions */ 
-               __u32   rcv_segs;       /* Number of received segments since last ack */
+               __u16   last_seg_size;  /* Size of last incoming segment        */
+               __u16   rcv_mss;        /* MSS used for delayed ACK decisions   */ 
+               __u16   rcv_small;      /* Number of not ACKed small segments   */
+               __u16   rcv_thresh;     /* Peer doing TCP_NODELAY               */
        } ack;
 
        /* Data for direct copy to user */
@@ -284,19 +286,18 @@ struct tcp_opt {
        } ucopy;
 
        __u32   snd_wl1;        /* Sequence for window update           */
-       __u32   snd_wl2;        /* Ack sequence for update              */
        __u32   snd_wnd;        /* The window we expect to receive      */
        __u32   max_window;     /* Maximal window ever seen from peer   */
        __u32   pmtu_cookie;    /* Last pmtu seen by socket             */
        __u16   mss_cache;      /* Cached effective mss, not including SACKS */
        __u16   mss_clamp;      /* Maximal mss, negotiated at connection setup */
        __u16   ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
-       __u8    dup_acks;       /* Consecutive duplicate acks seen from other end */
-       __u8    retransmits;
+       __u8    ca_state;       /* State of fast-retransmit machine     */
+       __u8    retransmits;    /* Number of unrecovered RTO timeouts.  */
 
-       __u8    __empty1;
-       __u8    sorry;
-       __u8    defer_accept;
+       __u8    reordering;     /* Packet reordering metric.            */
+       __u8    queue_shrunk;   /* Write queue has been shrunk recently.*/
+       __u8    defer_accept;   /* User waits for some data after accept() */
 
 /* RTT measurement */
        __u8    backoff;        /* backoff                              */
@@ -305,9 +306,9 @@ struct tcp_opt {
        __u32   rto;            /* retransmit timeout                   */
 
        __u32   packets_out;    /* Packets which are "in flight"        */
-       __u32   fackets_out;    /* Non-retrans SACK'd packets           */
-       __u32   retrans_out;    /* Fast-retransmitted packets out       */
-       __u32   high_seq;       /* snd_nxt at onset of congestion       */
+       __u32   left_out;       /* Packets which leaved network         */
+       __u32   retrans_out;    /* Retransmitted packets out            */
+
 
 /*
  *     Slow start and congestion control (see also Nagle, and Karn & Partridge)
@@ -316,12 +317,11 @@ struct tcp_opt {
        __u32   snd_cwnd;       /* Sending congestion window            */
        __u16   snd_cwnd_cnt;   /* Linear increase counter              */
        __u16   snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
-
-       __u8    nonagle;        /* Disable Nagle algorithm?             */
-       __u8    syn_retries;    /* num of allowed syn retries */
-       __u16   user_mss;       /* mss requested by user in ioctl */
+       __u32   snd_cwnd_used;
+       __u32   snd_cwnd_stamp;
 
        /* Two commonly used timers in both sender and receiver paths. */
+       unsigned long           timeout;
        struct timer_list       retransmit_timer;       /* Resend (no ack)      */
        struct timer_list       delack_timer;           /* Ack delay            */
 
@@ -329,16 +329,12 @@ struct tcp_opt {
 
        struct tcp_func         *af_specific;   /* Operations which are AF_INET{4,6} specific   */
        struct sk_buff          *send_head;     /* Front of stuff to transmit                   */
-       struct sk_buff          *retrans_head;  /* retrans head can be 
-                                                * different to the head of
-                                                * write queue if we are doing
-                                                * fast retransmit
-                                                */
 
        __u32   rcv_wnd;        /* Current receiver window              */
        __u32   rcv_wup;        /* rcv_nxt on last window update sent   */
-       __u32   write_seq;
-       __u32   copied_seq;
+       __u32   write_seq;      /* Tail(+1) of data held in tcp send buffer */
+       __u32   pushed_seq;     /* Last pushed seq, required to talk to windows */
+       __u32   copied_seq;     /* Head of yet unread data              */
 /*
  *      Options received (usually on last packet, some only on SYN packets).
  */
@@ -348,7 +344,7 @@ struct tcp_opt {
        char    saw_tstamp;     /* Saw TIMESTAMP on last packet         */
         __u8   snd_wscale;     /* Window scaling received from sender  */
         __u8   rcv_wscale;     /* Window scaling to send to receiver   */
-       __u8    rexmt_done;     /* Retransmitted up to send head?       */
+       __u8    nonagle;        /* Disable Nagle algorithm?             */
        __u8    keepalive_probes; /* num of allowed keep alive probes   */
 
 /*     PAWS/RTTM data  */
@@ -358,19 +354,37 @@ struct tcp_opt {
         long   ts_recent_stamp;/* Time we stored ts_recent (for aging) */
 
 /*     SACKs data      */
+       __u16   user_mss;       /* mss requested by user in ioctl */
+       __u8    dsack;          /* D-SACK is scheduled                  */
+       __u8    eff_sacks;      /* Size of SACK array to send with next packet */
+       struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
        struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
 
-       struct timer_list       probe_timer;            /* Probes       */
        __u32   window_clamp;   /* Maximal window to advertise          */
+       __u32   rcv_ssthresh;   /* Current window clamp                 */
        __u8    probes_out;     /* unanswered 0 window probes           */
        __u8    num_sacks;      /* Number of SACK blocks                */
        __u16   advmss;         /* Advertised MSS                       */
 
-       __u32   syn_stamp;
-       __u32   syn_seq;
-       __u32   fin_seq;
-       __u32   urg_seq;
-       __u32   urg_data;
+       __u8    syn_retries;    /* num of allowed syn retries */
+       __u8    ecn_flags;      /* ECN status bits.                     */
+       __u16   prior_ssthresh; /* ssthresh saved at recovery start     */
+       __u32   lost_out;       /* Lost packets                         */
+       __u32   sacked_out;     /* SACK'd packets                       */
+       __u32   fackets_out;    /* FACK'd packets                       */
+       __u32   high_seq;       /* snd_nxt at onset of congestion       */
+
+       __u32   retrans_stamp;  /* Timestamp of the last retransmit,
+                                * also used in SYN-SENT to remember stamp of
+                                * the first SYN. */
+       __u32   undo_marker;    /* tracking retrans started here. */
+       int     undo_retrans;   /* number of undoable retransmissions. */
+       __u32   syn_seq;        /* Seq of received SYN. */
+       __u32   fin_seq;        /* Seq of received FIN. */
+       __u32   urg_seq;        /* Seq of received urgent pointer */
+       __u16   urg_data;       /* Saved octet of OOB data and control flags */
+       __u8    pending;        /* Scheduled timer event        */
+       __u8    __empty;
 
        /* The syn_wait_lock is necessary only to avoid tcp_get_info having
         * to grab the main lock sock while browsing the listening hash
@@ -482,8 +496,8 @@ struct sock {
        __u16                   sport;          /* Source port                          */
 
        unsigned short          family;         /* Address family                       */
-       unsigned char           reuse,          /* SO_REUSEADDR setting                 */
-                               __unused;
+       unsigned char           reuse;          /* SO_REUSEADDR setting                 */
+       unsigned char           shutdown;
        atomic_t                refcnt;         /* Reference count                      */
 
        socket_lock_t           lock;           /* Synchronizer...                      */
@@ -497,6 +511,8 @@ struct sock {
        atomic_t                wmem_alloc;     /* Transmit queue bytes committed       */
        struct sk_buff_head     write_queue;    /* Packet sending queue                 */
        atomic_t                omem_alloc;     /* "o" is "option" or "other" */
+       int                     wmem_queued;    /* Persistent queue size */
+       int                     forward_alloc;  /* Space allocated forward. */
        __u32                   saddr;          /* Sending source                       */
        unsigned int            allocation;     /* Allocation mode                      */
        int                     sndbuf;         /* Size of send buffer in bytes         */
@@ -539,8 +555,6 @@ struct sock {
 
        struct proto            *prot;
 
-       unsigned short          shutdown;
-
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
        union {
                struct ipv6_pinfo       af_inet6;
@@ -734,6 +748,11 @@ static void __inline__ sock_prot_dec_use(struct proto *prot)
 #define RCV_SHUTDOWN   1
 #define SEND_SHUTDOWN  2
 
+#define SOCK_SNDBUF_LOCK       1
+#define SOCK_RCVBUF_LOCK       2
+#define SOCK_BINDADDR_LOCK     4
+
+
 /* Used by processes to "lock" a socket state, so that
  * interrupts and bottom half handlers won't change it
  * from under us. It essentially blocks any incoming
@@ -982,8 +1001,6 @@ extern __inline__ void sock_put(struct sock *sk)
  * we do not release it in this function, because protocol
  * probably wants some additional cleanups or even continuing
  * to work with this socket (TCP).
- *
- * NOTE: When softnet goes in replace _irq with _bh!
  */
 extern __inline__ void sock_orphan(struct sock *sk)
 {
@@ -1003,6 +1020,25 @@ extern __inline__ void sock_graft(struct sock *sk, struct socket *parent)
        write_unlock_bh(&sk->callback_lock);
 }
 
+static inline int sock_i_uid(struct sock *sk)
+{
+       int uid;
+
+       read_lock(&sk->callback_lock);
+       uid = sk->socket ? sk->socket->inode->i_uid : 0;
+       read_unlock(&sk->callback_lock);
+       return uid;
+}
+
+static inline unsigned long sock_i_ino(struct sock *sk)
+{
+       unsigned long ino;
+
+       read_lock(&sk->callback_lock);
+       ino = sk->socket ? sk->socket->inode->i_ino : 0;
+       read_unlock(&sk->callback_lock);
+       return ino;
+}
 
 extern __inline__ struct dst_entry *
 __sk_dst_get(struct sock *sk)
@@ -1194,7 +1230,7 @@ extern __inline__ void sk_wake_async(struct sock *sk, int how, int band)
 }
 
 #define SOCK_MIN_SNDBUF 2048
-#define SOCK_MIN_RCVBUF 128
+#define SOCK_MIN_RCVBUF 256
 /* Must be less or equal SOCK_MIN_SNDBUF */
 #define SOCK_MIN_WRITE_SPACE   SOCK_MIN_SNDBUF
 
index 7df8458954f01d2a903c8c2297757296b240726c..d92de90b57cfde2c043d632f76cb132d12d962df 100644 (file)
 #define _TCP_H
 
 #define TCP_DEBUG 1
+#define FASTRETRANS_DEBUG 2
+
+/* Be paranoid about data immediately beyond right edge of window. */
 #undef  TCP_FORMAL_WINDOW
-#define TCP_MORE_COARSE_ACKS
-#undef  TCP_LESS_COARSE_ACKS
+
+/* Cancel timers, when they are not required. */
+#undef TCP_CLEAR_TIMER
 
 #include <linux/config.h>
 #include <linux/tcp.h>
@@ -173,7 +177,7 @@ extern __inline__ void tcp_tw_put(struct tcp_tw_bucket *tw)
 }
 
 extern atomic_t tcp_orphan_count;
-extern int  tcp_tw_count;
+extern int tcp_tw_count;
 extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 extern void tcp_timewait_kill(struct tcp_tw_bucket *tw);
 extern void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo);
@@ -242,12 +246,14 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
 /* Minimal RCV_MSS. */
 #define TCP_MIN_RCVMSS         536
 
-/* 
- * How much of the receive buffer do we advertize 
- * (the rest is reserved for headers and driver packet overhead)
- * Use a power of 2.
- */
-#define TCP_WINDOW_ADVERTISE_DIVISOR 2
+/* After receiving this amount of duplicate ACKs fast retransmit starts. */
+#define TCP_FASTRETRANS_THRESH 3
+
+/* Maximal reordering. */
+#define TCP_MAX_REORDERING     127
+
+/* Maximal number of ACKs sent quickly to accelerate slow-start. */
+#define TCP_MAX_QUICKACKS      16
 
 /* urg_data states */
 #define TCP_URG_VALID  0x0100
@@ -292,7 +298,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
 #define TCP_DELACK_MAX (HZ/5)  /* maximal time to delay before sending an ACK */
 #define TCP_DELACK_MIN (2)     /* minimal time to delay before sending an ACK,
                                 * 2 scheduler ticks, not depending on HZ. */
-#define TCP_ATO_MAX    (HZ/2)  /* Clamp ATO estimator at his value. */
 #define TCP_ATO_MIN    2
 #define TCP_RTO_MAX    (120*HZ)
 #define TCP_RTO_MIN    (HZ/5)
@@ -414,6 +419,19 @@ extern int sysctl_tcp_tw_recycle;
 extern int sysctl_tcp_abort_on_overflow;
 extern int sysctl_tcp_max_orphans;
 extern int sysctl_tcp_max_tw_buckets;
+extern int sysctl_tcp_fack;
+extern int sysctl_tcp_reordering;
+extern int sysctl_tcp_ecn;
+extern int sysctl_tcp_dsack;
+extern int sysctl_tcp_mem[3];
+extern int sysctl_tcp_wmem[3];
+extern int sysctl_tcp_rmem[3];
+extern int sysctl_tcp_app_win;
+extern int sysctl_tcp_adv_win_scale;
+
+extern atomic_t tcp_memory_allocated;
+extern atomic_t tcp_sockets_allocated;
+extern int tcp_memory_pressure;
 
 struct open_request;
 
@@ -606,6 +624,16 @@ extern int                 tcp_rcv_established(struct sock *sk,
                                                    struct tcphdr *th, 
                                                    unsigned len);
 
+static inline void tcp_schedule_ack(struct tcp_opt *tp)
+{
+       tp->ack.pending |= 1;
+}
+
+static inline int tcp_ack_scheduled(struct tcp_opt *tp)
+{
+       return tp->ack.pending&1;
+}
+
 static __inline__ void tcp_dec_quickack_mode(struct tcp_opt *tp)
 {
        if (tp->ack.quick && --tp->ack.quick == 0) {
@@ -614,11 +642,27 @@ static __inline__ void tcp_dec_quickack_mode(struct tcp_opt *tp)
        }
 }
 
+extern void tcp_enter_quickack_mode(struct tcp_opt *tp);
+
 static __inline__ void tcp_delack_init(struct tcp_opt *tp)
 {
        memset(&tp->ack, 0, sizeof(tp->ack));
 }
 
+enum tcp_ca_state
+{
+       TCP_CA_Open = 0,
+#define TCPF_CA_Open   (1<<TCP_CA_Open)
+       TCP_CA_Disorder = 1,
+#define TCPF_CA_Disorder (1<<TCP_CA_Disorder)
+       TCP_CA_CWR = 2,
+#define TCPF_CA_CWR    (1<<TCP_CA_CWR)
+       TCP_CA_Recovery = 3,
+#define TCPF_CA_Recovery (1<<TCP_CA_Recovery)
+       TCP_CA_Loss = 4
+#define TCPF_CA_Loss   (1<<TCP_CA_Loss)
+};
+
 
 enum tcp_tw_status
 {
@@ -640,6 +684,9 @@ extern struct sock *                tcp_check_req(struct sock *sk,struct sk_buff *skb,
 extern int                     tcp_child_process(struct sock *parent,
                                                  struct sock *child,
                                                  struct sk_buff *skb);
+extern void                    tcp_enter_loss(struct sock *sk, int how);
+extern void                    tcp_clear_retrans(struct tcp_opt *tp);
+extern void                    tcp_update_metrics(struct sock *sk);
 
 extern void                    tcp_close(struct sock *sk, 
                                          long timeout);
@@ -661,8 +708,8 @@ extern int                  tcp_recvmsg(struct sock *sk,
 
 extern int                     tcp_listen_start(struct sock *sk);
 
-extern void                    tcp_parse_options(struct sock *sk, struct tcphdr *th,
-                                                 struct tcp_opt *tp, int no_fancy);
+extern void                    tcp_parse_options(struct sk_buff *skb,
+                                                 struct tcp_opt *tp);
 
 /*
  *     TCP v4 functions exported for the inet6 API
@@ -720,7 +767,6 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
 
 extern int tcp_write_xmit(struct sock *);
 extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
-extern void tcp_fack_retransmit(struct sock *);
 extern void tcp_xmit_retransmit_queue(struct sock *);
 extern void tcp_simple_retransmit(struct sock *);
 
@@ -736,7 +782,6 @@ extern void tcp_send_ack(struct sock *sk);
 extern void tcp_send_delayed_ack(struct sock *sk);
 
 /* tcp_timer.c */
-extern void tcp_reset_xmit_timer(struct sock *, int, unsigned long);
 extern void tcp_init_xmit_timers(struct sock *);
 extern void tcp_clear_xmit_timers(struct sock *);
 
@@ -744,6 +789,79 @@ extern void tcp_delete_keepalive_timer (struct sock *);
 extern void tcp_reset_keepalive_timer (struct sock *, unsigned long);
 extern int tcp_sync_mss(struct sock *sk, u32 pmtu);
 
+extern const char timer_bug_msg[];
+
+
+static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
+{
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+       
+       switch (what) {
+       case TCP_TIME_RETRANS:
+       case TCP_TIME_PROBE0:
+               tp->pending = 0;
+
+#ifdef TCP_CLEAR_TIMERS
+               if (timer_pending(&tp->retransmit_timer) &&
+                   del_timer(&tp->retransmit_timer))
+                       __sock_put(sk);
+#endif
+               break;
+       case TCP_TIME_DACK:
+               tp->ack.blocked = 0;
+               tp->ack.pending = 0;
+
+#ifdef TCP_CLEAR_TIMERS
+               if (timer_pending(&tp->delack_timer) &&
+                   del_timer(&tp->delack_timer))
+                       __sock_put(sk);
+#endif
+               break;
+       default:
+               printk(timer_bug_msg);
+               return;
+       };
+
+}
+
+/*
+ *     Reset the retransmission timer
+ */
+static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
+{
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+       if (when > TCP_RTO_MAX) {
+#ifdef TCP_DEBUG
+               __label__ here;
+
+               printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, &&here);
+here:
+#endif
+               when = TCP_RTO_MAX;
+       }
+
+       switch (what) {
+       case TCP_TIME_RETRANS:
+       case TCP_TIME_PROBE0:
+               tp->pending = what;
+               tp->timeout = jiffies+when;
+               if (!mod_timer(&tp->retransmit_timer, tp->timeout))
+                       sock_hold(sk);
+               break;
+
+       case TCP_TIME_DACK:
+               tp->ack.pending |= 2;
+               tp->ack.timeout = jiffies+when;
+               if (!mod_timer(&tp->delack_timer, tp->ack.timeout))
+                       sock_hold(sk);
+               break;
+
+       default:
+               printk(KERN_DEBUG "bug: unknown timer value\n");
+       };
+}
+
 /* Compute the current effective MSS, taking SACKs and IP options,
  * and even PMTU discovery events into account.
  */
@@ -757,9 +875,9 @@ static __inline__ unsigned int tcp_current_mss(struct sock *sk)
        if (dst && dst->pmtu != tp->pmtu_cookie)
                mss_now = tcp_sync_mss(sk, dst->pmtu);
 
-       if(tp->sack_ok && tp->num_sacks)
+       if (tp->eff_sacks)
                mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
-                           (tp->num_sacks * TCPOLEN_SACK_PERBLOCK));
+                           (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
        return mss_now;
 }
 
@@ -774,15 +892,8 @@ static __inline__ unsigned int tcp_current_mss(struct sock *sk)
 extern __inline__ void tcp_initialize_rcv_mss(struct sock *sk)
 {
        struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
-       struct dst_entry *dst = __sk_dst_get(sk);
-       int mss;
-
-       if (dst)
-               mss = dst->advmss;
-       else
-               mss = tp->mss_cache;
 
-       tp->ack.rcv_mss = max(min(mss, TCP_MIN_RCVMSS), TCP_MIN_MSS);
+       tp->ack.rcv_mss = max(min(tp->advmss, TCP_MIN_RCVMSS), TCP_MIN_MSS);
 }
 
 static __inline__ void __tcp_fast_path_on(struct tcp_opt *tp, u32 snd_wnd)
@@ -797,9 +908,6 @@ static __inline__ void tcp_fast_path_on(struct tcp_opt *tp)
        __tcp_fast_path_on(tp, tp->snd_wnd>>tp->snd_wscale);
 }
 
-
-
-
 /* Compute the actual receive window we are currently advertising.
  * Rcv_nxt can be after the window if our peer push more data
  * than the offered window.
@@ -819,52 +927,6 @@ static __inline__ u32 tcp_receive_window(struct tcp_opt *tp)
  */
 extern u32     __tcp_select_window(struct sock *sk);
 
-/* Chose a new window to advertise, update state in tcp_opt for the
- * socket, and return result with RFC1323 scaling applied.  The return
- * value can be stuffed directly into th->window for an outgoing
- * frame.
- */
-extern __inline__ u16 tcp_select_window(struct sock *sk)
-{
-       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-       u32 cur_win = tcp_receive_window(tp);
-       u32 new_win = __tcp_select_window(sk);
-
-       /* Never shrink the offered window */
-       if(new_win < cur_win) {
-               /* Danger Will Robinson!
-                * Don't update rcv_wup/rcv_wnd here or else
-                * we will not be able to advertise a zero
-                * window in time.  --DaveM
-                *
-                * Relax Will Robinson.
-                */
-               new_win = cur_win;
-       }
-       tp->rcv_wnd = new_win;
-       tp->rcv_wup = tp->rcv_nxt;
-
-       /* RFC1323 scaling applied */
-       new_win >>= tp->rcv_wscale;
-
-#ifdef TCP_FORMAL_WINDOW
-       if (new_win == 0) {
-               /* If we advertise zero window, disable fast path. */
-               tp->pred_flags = 0;
-       } else if (cur_win == 0 && tp->pred_flags == 0 &&
-                  skb_queue_len(&tp->out_of_order_queue) == 0 &&
-                  !tp->urg_data) {
-               /* If we open zero window, enable fast path.
-                  Without this it will be open by the first data packet,
-                  it is too late to merge checksumming to copy.
-                */
-               tcp_fast_path_on(tp);
-       }
-#endif
-
-       return new_win;
-}
-
 /* TCP timestamps are only 32-bits, this causes a slight
  * complication on 64-bit systems since we store a snapshot
  * of jiffies in the buffer control blocks below.  We decidely
@@ -907,6 +969,12 @@ struct tcp_skb_cb {
        __u8            sacked;         /* State flags for SACK/FACK.   */
 #define TCPCB_SACKED_ACKED     0x01    /* SKB ACK'd by a SACK block    */
 #define TCPCB_SACKED_RETRANS   0x02    /* SKB retransmitted            */
+#define TCPCB_LOST             0x04    /* SKB is lost                  */
+#define TCPCB_TAGBITS          0x07    /* All tag bits                 */
+
+#define TCPCB_EVER_RETRANS     0x80    /* Ever retransmitted frame     */
+#define TCPCB_RETRANS          (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
+
 
        __u16           urg_ptr;        /* Valid w/URG flags is set.    */
        __u32           ack_seq;        /* Sequence number ACK'd        */
@@ -914,11 +982,28 @@ struct tcp_skb_cb {
 
 #define TCP_SKB_CB(__skb)      ((struct tcp_skb_cb *)&((__skb)->cb[0]))
 
+#define for_retrans_queue(skb, sk, tp) \
+               for (skb = (sk)->write_queue.next;                      \
+                    (skb != (tp)->send_head) &&                        \
+                    (skb != (struct sk_buff *)&(sk)->write_queue);     \
+                    skb=skb->next)
+
+
+#include <net/tcp_ecn.h>
+
+
 /*
  *     Compute minimal free write space needed to queue new packets. 
  */
-#define tcp_min_write_space(__sk) \
-       (atomic_read(&(__sk)->wmem_alloc) / 2)
+static inline int tcp_min_write_space(struct sock *sk)
+{
+       return sk->wmem_queued/2;
+}
+static inline int tcp_wspace(struct sock *sk)
+{
+       return sk->sndbuf - sk->wmem_queued;
+}
 
 
 /* This determines how many packets are "in the network" to the best
@@ -932,89 +1017,97 @@ struct tcp_skb_cb {
  * Read this equation as:
  *
  *     "Packets sent once on transmission queue" MINUS
- *     "Packets acknowledged by FACK information" PLUS
+ *     "Packets left network, but not honestly ACKed yet" PLUS
  *     "Packets fast retransmitted"
  */
 static __inline__ int tcp_packets_in_flight(struct tcp_opt *tp)
 {
-       return tp->packets_out - tp->fackets_out + tp->retrans_out;
+       return tp->packets_out - tp->left_out + tp->retrans_out;
 }
 
 /* Recalculate snd_ssthresh, we want to set it to:
  *
  *     one half the current congestion window, but no
  *     less than two segments
- *
- * We must take into account the current send window
- * as well, however we keep track of that using different
- * units so a conversion is necessary.  -DaveM
- *
- * RED-PEN.
- *  RFC 2581: "an easy mistake to make is to simply use cwnd,
- *             rather than FlightSize"
- * I see no references to FlightSize here. snd_wnd is not FlightSize,
- * it is also apriory characteristics.
- *
- *   FlightSize = min((snd_nxt-snd_una)/mss, packets_out) ?
  */
 extern __inline__ __u32 tcp_recalc_ssthresh(struct tcp_opt *tp)
 {
-       u32 FlightSize = (tp->snd_nxt - tp->snd_una)/tp->mss_cache;
+       return max(tp->snd_cwnd>>1, 2);
+}
+
+/* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
+ * The exception is rate halving phase, when cwnd is decreasing towards
+ * ssthresh.
+ */
+extern __inline__ __u32 tcp_current_ssthresh(struct tcp_opt *tp)
+{
+       if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery))
+               return tp->snd_ssthresh;
+       else
+               return max(tp->snd_ssthresh, (tp->snd_cwnd>>1)+(tp->snd_cwnd>>2));
+}
+
+extern void tcp_cwnd_application_limited(struct sock *sk);
 
-       FlightSize = min(FlightSize, tcp_packets_in_flight(tp));
+/* Congestion window validation. (RFC2861) */
 
-       return max(min(FlightSize, tp->snd_cwnd) >> 1, 2);
+static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_opt *tp)
+{
+       if (tp->packets_out >= tp->snd_cwnd) {
+               /* Network is feed fully. */
+               tp->snd_cwnd_used = 0;
+               tp->snd_cwnd_stamp = tcp_time_stamp;
+       } else {
+               /* Network starves. */
+               if (tp->packets_out > tp->snd_cwnd_used)
+                       tp->snd_cwnd_used = tp->packets_out;
+
+               if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
+                       tcp_cwnd_application_limited(sk);
+       }
 }
 
 /* Set slow start threshould and cwnd not falling to slow start */
-extern __inline__ void __tcp_enter_cong_avoid(struct tcp_opt *tp)
+extern __inline__ void __tcp_enter_cwr(struct tcp_opt *tp)
 {
+       tp->undo_marker = 0;
        tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
-       if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
-               tp->snd_ssthresh = tp->snd_cwnd_clamp;
-       tp->snd_cwnd = tp->snd_ssthresh;
+       tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);
        tp->snd_cwnd_cnt = 0;
        tp->high_seq = tp->snd_nxt;
+       tp->snd_cwnd_stamp = tcp_time_stamp;
+       TCP_ECN_queue_cwr(tp);
 }
 
-extern __inline__ void tcp_enter_cong_avoid(struct tcp_opt *tp)
+extern __inline__ void tcp_enter_cwr(struct tcp_opt *tp)
 {
-       if (!tp->high_seq || after(tp->snd_nxt, tp->high_seq))
-               __tcp_enter_cong_avoid(tp);
+       tp->prior_ssthresh = 0;
+       if (tp->ca_state < TCP_CA_CWR) {
+               __tcp_enter_cwr(tp);
+               tp->ca_state = TCP_CA_CWR;
+       }
 }
 
+extern __u32 tcp_init_cwnd(struct tcp_opt *tp);
 
-/* Increase initial CWND conservatively, i.e. only if estimated
-   RTT is low enough. It is not quite correct, we should use
-   POWER i.e. RTT*BANDWIDTH, but we still cannot estimate this.
-
-   Numbers are taken from RFC1414.
+/* Slow start with delack produces 3 packets of burst, so that
+ * it is safe "de facto".
  */
-static __inline__ __u32 tcp_init_cwnd(struct tcp_opt *tp)
+static __inline__ __u32 tcp_max_burst(struct tcp_opt *tp)
 {
-       __u32 cwnd;
-
-       if (!tp->srtt || tp->srtt > ((HZ/50)<<3) || tp->mss_cache > 1460)
-               cwnd = 2;
-       else if (tp->mss_cache > 1095)
-               cwnd = 3;
-       else
-               cwnd = 4;
-
-       return min(cwnd, tp->snd_cwnd_clamp);
+       return 3;
 }
 
-
 static __inline__ int tcp_minshall_check(struct tcp_opt *tp)
 {
        return after(tp->snd_sml,tp->snd_una) &&
                !after(tp->snd_sml, tp->snd_nxt);
 }
 
-static __inline__ void tcp_minshall_update(struct tcp_opt *tp, int mss, int len)
+static __inline__ void tcp_minshall_update(struct tcp_opt *tp, int mss, struct sk_buff *skb)
 {
-       if (len < mss)
-               tp->snd_sml = tp->snd_nxt;
+       if (skb->len < mss)
+               tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
 }
 
 /* Return 0, if packet can be sent now without violation Nagle's rules:
@@ -1041,17 +1134,6 @@ static __inline__ int tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, u
 static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
                                   unsigned cur_mss, int tail)
 {
-       /*
-        * Reset CWND after idle period longer RTO to "restart window".
-        * It is "side" effect of the function, which is _not_ good
-        * from viewpoint of clarity. But we have to make it before
-        * checking congestion window below. Alternative is to prepend
-        * all the calls with this test.
-        */
-       if (tp->packets_out==0 &&
-           (s32)(tcp_time_stamp - tp->lsndtime) > tp->rto)
-               tp->snd_cwnd = min(tp->snd_cwnd, tcp_init_cwnd(tp));
-
        /*      RFC 1122 - section 4.2.3.4
         *
         *      We must queue if
@@ -1062,8 +1144,7 @@ static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
         *         (part of SWS is done on packetization)
         *         Minshall version sounds: there are no _small_
         *         segments in flight. (tcp_nagle_check)
-        *      c) We are retransmiting [Nagle]
-        *      d) We have too many packets 'in flight'
+        *      c) We have too many packets 'in flight'
         *
         *      Don't use the nagle rule for urgent data (or
         *      for the final FIN -DaveM).
@@ -1081,13 +1162,12 @@ static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
                 skb_tailroom(skb) < 32) &&
                ((tcp_packets_in_flight(tp) < tp->snd_cwnd) ||
                 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) &&
-               !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd) &&
-               tp->retransmits == 0);
+               !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd));
 }
 
 static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp)
 {
-       if (!tp->packets_out && !timer_pending(&tp->probe_timer))
+       if (!tp->packets_out && !tp->pending)
                tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
 }
 
@@ -1111,6 +1191,7 @@ static __inline__ void __tcp_push_pending_frames(struct sock *sk,
                    tcp_write_xmit(sk))
                        tcp_check_probe_timer(sk, tp);
        }
+       tcp_cwnd_validate(sk, tp);
 }
 
 static __inline__ void tcp_push_pending_frames(struct sock *sk,
@@ -1119,6 +1200,24 @@ static __inline__ void tcp_push_pending_frames(struct sock *sk,
        __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk));
 }
 
+static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_opt *tp)
+{
+       struct sk_buff *skb = tp->send_head;
+
+       return (skb &&
+               tcp_snd_test(tp, skb, tcp_current_mss(sk), tcp_skb_is_last(sk, skb)));
+}
+
+static __inline__ void tcp_init_wl(struct tcp_opt *tp, u32 ack, u32 seq)
+{
+       tp->snd_wl1 = seq;
+}
+
+static __inline__ void tcp_update_wl(struct tcp_opt *tp, u32 ack, u32 seq)
+{
+       tp->snd_wl1 = seq;
+}
+
 extern void                    tcp_destroy_sock(struct sock *sk);
 
 
@@ -1143,7 +1242,6 @@ static __inline__ int tcp_checksum_complete(struct sk_buff *skb)
                __tcp_checksum_complete(skb);
 }
 
-
 /* Prequeue for VJ style copy to user, combined with checksumming. */
 
 static __inline__ void tcp_prequeue_init(struct tcp_opt *tp)
@@ -1167,12 +1265,15 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
        if (tp->ucopy.task) {
                if ((tp->ucopy.memory += skb->truesize) <= (sk->rcvbuf<<1)) {
                        __skb_queue_tail(&tp->ucopy.prequeue, skb);
-                       if (skb_queue_len(&tp->ucopy.prequeue) == 1)
+                       if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
                                wake_up_interruptible(sk->sleep);
+                               if (!tcp_ack_scheduled(tp))
+                                       tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4);
+                       }
                } else {
                        NET_INC_STATS_BH(TCPPrequeueDropped);
                        tp->ucopy.memory -= skb->truesize;
-                       kfree_skb(skb);
+                       __kfree_skb(skb);
                }
                return 1;
        }
@@ -1231,6 +1332,13 @@ static __inline__ void tcp_done(struct sock *sk)
                tcp_destroy_sock(sk);
 }
 
+static __inline__ void tcp_sack_reset(struct tcp_opt *tp)
+{
+       tp->dsack = 0;
+       tp->eff_sacks = 0;
+       tp->num_sacks = 0;
+}
+
 static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt *tp, __u32 tstamp)
 {
        if (tp->tstamp_ok) {
@@ -1241,17 +1349,22 @@ static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_opt *
                *ptr++ = htonl(tstamp);
                *ptr++ = htonl(tp->ts_recent);
        }
-       if(tp->sack_ok && tp->num_sacks) {
+       if (tp->eff_sacks) {
+               struct tcp_sack_block *sp = tp->dsack ? tp->duplicate_sack : tp->selective_acks;
                int this_sack;
 
                *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
                                          (TCPOPT_NOP << 16) |
                                          (TCPOPT_SACK << 8) |
                                          (TCPOLEN_SACK_BASE +
-                                          (tp->num_sacks * TCPOLEN_SACK_PERBLOCK)));
-               for(this_sack = 0; this_sack < tp->num_sacks; this_sack++) {
-                       *ptr++ = htonl(tp->selective_acks[this_sack].start_seq);
-                       *ptr++ = htonl(tp->selective_acks[this_sack].end_seq);
+                                          (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK)));
+               for(this_sack = 0; this_sack < tp->eff_sacks; this_sack++) {
+                       *ptr++ = htonl(sp[this_sack].start_seq);
+                       *ptr++ = htonl(sp[this_sack].end_seq);
+               }
+               if (tp->dsack) {
+                       tp->dsack = 0;
+                       tp->eff_sacks--;
                }
        }
 }
@@ -1330,42 +1443,44 @@ extern __inline__ void tcp_select_initial_window(int space, __u32 mss,
                        space >>= 1;
                        (*rcv_wscale)++;
                }
+               if (*rcv_wscale && sysctl_tcp_app_win && space>=mss &&
+                   space - max((space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2)
+                       (*rcv_wscale)--;
+       }
+
+       /* Set initial window to value enough for senders,
+        * following RFC1414. Senders, not following this RFC,
+        * will be satisfied with 2.
+        */
+       if (mss > (1<<*rcv_wscale)) {
+               int init_cwnd = 4;
+               if (mss > 1460*3)
+                       init_cwnd = 2;
+               else if (mss > 1460)
+                       init_cwnd = 3;
+               if (*rcv_wnd > init_cwnd*mss)
+                       *rcv_wnd = init_cwnd*mss;
        }
        /* Set the clamp no higher than max representable value */
        (*window_clamp) = min(65535<<(*rcv_wscale),*window_clamp);
 }
 
+static inline int tcp_win_from_space(int space)
+{
+       return sysctl_tcp_adv_win_scale<=0 ?
+               (space>>(-sysctl_tcp_adv_win_scale)) :
+               space - (space>>sysctl_tcp_adv_win_scale);
+}
+
 /* Note: caller must be prepared to deal with negative returns */ 
 extern __inline__ int tcp_space(struct sock *sk)
 {
-       return (sk->rcvbuf - atomic_read(&sk->rmem_alloc)) / 
-               TCP_WINDOW_ADVERTISE_DIVISOR; 
+       return tcp_win_from_space(sk->rcvbuf - atomic_read(&sk->rmem_alloc));
 } 
 
 extern __inline__ int tcp_full_space( struct sock *sk)
 {
-       return sk->rcvbuf / TCP_WINDOW_ADVERTISE_DIVISOR; 
-}
-
-extern __inline__ void tcp_init_buffer_space(struct sock *sk)
-{
-       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-       int rcvbuf = tp->advmss+MAX_TCP_HEADER+16+sizeof(struct sk_buff);
-       int sndbuf = tp->mss_clamp+MAX_TCP_HEADER+16+sizeof(struct sk_buff);
-
-       if (sk->rcvbuf < 3*rcvbuf)
-               sk->rcvbuf = min (3*rcvbuf, sysctl_rmem_max);
-
-       /* Reserve slack space to reduce jitter of advertised window. */
-       if (tp->window_clamp >= tcp_full_space(sk)) {
-               int nwin = tcp_full_space(sk) - tp->mss_clamp;
-
-               if (nwin >= MAX_TCP_WINDOW && nwin >= 2*tp->advmss)
-                       tp->window_clamp = nwin;
-       }
-
-       if (sk->sndbuf < 3*sndbuf)
-               sk->sndbuf = min (3*sndbuf, sysctl_wmem_max);
+       return tcp_win_from_space(sk->rcvbuf); 
 }
 
 extern __inline__ void tcp_acceptq_removed(struct sock *sk)
@@ -1473,61 +1588,85 @@ static __inline__ void tcp_openreq_init(struct open_request *req,
        req->snd_wscale = tp->snd_wscale;
        req->wscale_ok = tp->wscale_ok;
        req->acked = 0;
+       req->ecn_ok = 0;
        req->rmt_port = skb->h.th->source;
 }
 
-extern const char timer_bug_msg[];
+#define TCP_MEM_QUANTUM        ((int)PAGE_SIZE)
 
-static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
+static inline void tcp_free_skb(struct sock *sk, struct sk_buff *skb)
 {
-       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
-       struct timer_list *timer;
-       
-       switch (what) {
-       case TCP_TIME_RETRANS:
-               timer = &tp->retransmit_timer;
-               break;
-       case TCP_TIME_DACK:
-               tp->ack.blocked = 0;
-               timer = &tp->delack_timer;
-               break;
-       case TCP_TIME_PROBE0:
-               timer = &tp->probe_timer;
-               break;  
-       default:
-               printk(timer_bug_msg);
-               return;
-       };
+       sk->tp_pinfo.af_tcp.queue_shrunk = 1;
+       sk->wmem_queued -= skb->truesize;
+       sk->forward_alloc += skb->truesize;
+       __kfree_skb(skb);
+}
 
-       if (timer_pending(timer) && del_timer(timer))
-               __sock_put(sk);
+static inline void tcp_charge_skb(struct sock *sk, struct sk_buff *skb)
+{
+       sk->wmem_queued += skb->truesize;
+       sk->forward_alloc -= skb->truesize;
 }
 
-/* This function does not return reliable answer. Use it only as advice.
- */
+extern void __tcp_mem_reclaim(struct sock *sk);
+extern int tcp_mem_schedule(struct sock *sk, int size, int kind);
 
-static inline int tcp_timer_is_set(struct sock *sk, int what)
+static inline void tcp_mem_reclaim(struct sock *sk)
 {
-       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
-       int ret;
+       if (sk->forward_alloc >= TCP_MEM_QUANTUM)
+               __tcp_mem_reclaim(sk);
+}
 
-       switch (what) {
-       case TCP_TIME_RETRANS:
-               ret = timer_pending(&tp->retransmit_timer);
-               break;
-       case TCP_TIME_DACK:
-               ret = timer_pending(&tp->delack_timer);
-               break;
-       case TCP_TIME_PROBE0:
-               ret = timer_pending(&tp->probe_timer);
-               break;  
-       default:
-               ret = 0;
-               printk(timer_bug_msg);
-       };
-       return ret;
+static inline void tcp_enter_memory_pressure(void)
+{
+       if (!tcp_memory_pressure) {
+               NET_INC_STATS(TCPMemoryPressures);
+               tcp_memory_pressure = 1;
+       }
 }
 
+static inline void tcp_moderate_sndbuf(struct sock *sk)
+{
+       if (!(sk->userlocks&SOCK_SNDBUF_LOCK)) {
+               sk->sndbuf = min(sk->sndbuf, sk->wmem_queued/2);
+               sk->sndbuf = max(sk->sndbuf, SOCK_MIN_SNDBUF);
+       }
+}
+
+static inline struct sk_buff *tcp_alloc_skb(struct sock *sk, int size, int gfp)
+{
+       struct sk_buff *skb = alloc_skb(size, gfp);
+
+       if (skb) {
+               if (sk->forward_alloc >= (int)skb->truesize ||
+                   tcp_mem_schedule(sk, skb->truesize, 0))
+                       return skb;
+               __kfree_skb(skb);
+       } else {
+               tcp_enter_memory_pressure();
+               tcp_moderate_sndbuf(sk);
+       }
+       return NULL;
+}
+
+static inline void tcp_writequeue_purge(struct sock *sk)
+{
+       struct sk_buff *skb;
+
+       while ((skb = __skb_dequeue(&sk->write_queue)) != NULL)
+               tcp_free_skb(sk, skb);
+       tcp_mem_reclaim(sk);
+}
+
+extern void tcp_rfree(struct sk_buff *skb);
+
+static inline void tcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
+{
+       skb->sk = sk;
+       skb->destructor = tcp_rfree;
+       atomic_add(skb->truesize, &sk->rmem_alloc);
+       sk->forward_alloc -= skb->truesize;
+}
 
 extern void tcp_listen_wlock(void);
 
@@ -1570,28 +1709,30 @@ static inline int tcp_fin_time(struct tcp_opt *tp)
        return fin_timeout;
 }
 
-#if 0 /* TCP_DEBUG */
-#define TCP_CHECK_TIMER(sk) \
-do {   struct tcp_opt *__tp = &sk->tp_pinfo.af_tcp; \
-       if (sk->state != TCP_CLOSE) { \
-               if (__tp->packets_out) { \
-                       if (!tcp_timer_is_set(sk, TCP_TIME_RETRANS) && !timer_is_running(&__tp->retransmit_timer) && net_ratelimit()) \
-                               printk(KERN_DEBUG "sk=%p RETRANS" __FUNCTION__ "(%d) %d\n", sk, __LINE__, sk->state); \
-               } else if (__tp->send_head) { \
-                       if (!tcp_timer_is_set(sk, TCP_TIME_PROBE0) && !timer_is_running(&__tp->probe_timer) && net_ratelimit()) \
-                               printk(KERN_DEBUG "sk=%p PROBE0" __FUNCTION__ "(%d) %d\n", sk, __LINE__, sk->state); \
-               } \
-               if (__tp->ack.pending) { \
-                       if (!tcp_timer_is_set(sk, TCP_TIME_DACK) && !timer_is_running(&__tp->delack_timer) && net_ratelimit()) \
-                               printk(KERN_DEBUG "sk=%p DACK" __FUNCTION__ "(%d) %d\n", sk, __LINE__, sk->state); \
-               } \
-                if (__tp->packets_out > skb_queue_len(&sk->write_queue) || \
-                   (__tp->send_head && skb_queue_len(&sk->write_queue) == 0)) { \
-                        printk(KERN_DEBUG "sk=%p QUEUE" __FUNCTION__ "(%d) %d %d %d %p\n", sk, __LINE__, sk->state, __tp->packets_out, skb_queue_len(&sk->write_queue), __tp->send_head); \
-               } \
-       } } while (0)
-#else
+static inline int tcp_paws_check(struct tcp_opt *tp, int rst)
+{
+       if ((s32)(tp->rcv_tsval - tp->ts_recent) >= 0)
+               return 0;
+       if (xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
+               return 0;
+
+       /* RST segments are not recommended to carry timestamp,
+          and, if they do, it is recommended to ignore PAWS because
+          "their cleanup function should take precedence over timestamps."
+          Certainly, it is mistake. It is necessary to understand the reasons
+          of this constraint to relax it: if peer reboots, clock may go
+          out-of-sync and half-open connections will not be reset.
+          Actually, the problem would be not existing if all
+          the implementations followed draft about maintaining clock
+          via reboots. Linux-2.2 DOES NOT!
+
+          However, we can relax time bounds for RST segments to MSL.
+        */
+       if (rst && xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_MSL)
+               return 0;
+       return 1;
+}
+
 #define TCP_CHECK_TIMER(sk) do { } while (0);
-#endif
 
 #endif /* _TCP_H */
diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
new file mode 100644 (file)
index 0000000..db00424
--- /dev/null
@@ -0,0 +1,155 @@
+#ifndef _NET_TCP_ECN_H_
+#define _NET_TCP_ECN_H_ 1
+
+#include <linux/config.h>
+
+#ifdef CONFIG_INET_ECN
+
+#include <net/inet_ecn.h>
+
+#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)|TCP_FLAG_ECE|TCP_FLAG_CWR)
+
+#define        TCP_ECN_OK              1
+#define TCP_ECN_QUEUE_CWR      2
+#define TCP_ECN_DEMAND_CWR     4
+
+static __inline__ void
+TCP_ECN_queue_cwr(struct tcp_opt *tp)
+{
+       if (tp->ecn_flags&TCP_ECN_OK)
+               tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
+}
+
+
+/* Output functions */
+
+static __inline__ void
+TCP_ECN_send_synack(struct tcp_opt *tp, struct sk_buff *skb)
+{
+       TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR;
+       if (!(tp->ecn_flags&TCP_ECN_OK))
+               TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
+}
+
+static __inline__ void
+TCP_ECN_send_syn(struct tcp_opt *tp, struct sk_buff *skb)
+{
+       tp->ecn_flags = 0;
+       if (sysctl_tcp_ecn) {
+               TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR;
+               tp->ecn_flags = TCP_ECN_OK;
+       }
+}
+
+static __inline__ void
+TCP_ECN_make_synack(struct open_request *req, struct tcphdr *th)
+{
+       if (req->ecn_ok)
+               th->ece = 1;
+}
+
+static __inline__ void
+TCP_ECN_send(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb, int tcp_header_len)
+{
+       if (tp->ecn_flags & TCP_ECN_OK) {
+               /* Not-retransmitted data segment: set ECT and inject CWR. */
+               if (skb->len != tcp_header_len &&
+                   !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
+                       INET_ECN_xmit(sk);
+                       if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) {
+                               tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
+                               skb->h.th->cwr = 1;
+                       }
+               } else {
+                       /* ACK or retransmitted segment: clear ECT|CE */
+                       INET_ECN_dontxmit(sk);
+               }
+               if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
+                       skb->h.th->ece = 1;
+       }
+}
+
+/* Input functions */
+
+static __inline__ void
+TCP_ECN_accept_cwr(struct tcp_opt *tp, struct sk_buff *skb)
+{
+       if (skb->h.th->cwr)
+               tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
+}
+
+static __inline__ void
+TCP_ECN_check_ce(struct tcp_opt *tp, struct sk_buff *skb)
+{
+       if (tp->ecn_flags&TCP_ECN_OK) {
+               if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
+                       tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
+               /* Funny extension: if ECT is not set on a segment,
+                * it is surely retransmit. It is not in ECN RFC,
+                * but Linux follows this rule. */
+               else if (!INET_ECN_is_capable((TCP_SKB_CB(skb)->flags)))
+                       tcp_enter_quickack_mode(tp);
+       }
+}
+
+static __inline__ void
+TCP_ECN_rcv_synack(struct tcp_opt *tp, struct tcphdr *th)
+{
+       if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || th->cwr))
+               tp->ecn_flags &= ~TCP_ECN_OK;
+}
+
+static __inline__ void
+TCP_ECN_rcv_syn(struct tcp_opt *tp, struct tcphdr *th)
+{
+       if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || !th->cwr))
+               tp->ecn_flags &= ~TCP_ECN_OK;
+}
+
+static __inline__ int
+TCP_ECN_rcv_ecn_echo(struct tcp_opt *tp, struct tcphdr *th)
+{
+       if (th->ece && !th->syn && (tp->ecn_flags&TCP_ECN_OK))
+               return 1;
+       return 0;
+}
+
+static __inline__ void
+TCP_ECN_openreq_child(struct tcp_opt *tp, struct open_request *req)
+{
+       tp->ecn_flags = req->ecn_ok ? TCP_ECN_OK : 0;
+}
+
+static __inline__ void
+TCP_ECN_create_request(struct open_request *req, struct tcphdr *th)
+{
+       if (sysctl_tcp_ecn && th->ece && th->cwr)
+               req->ecn_ok = 1;
+}
+
+
+
+#else
+
+#define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
+
+
+#define TCP_ECN_send_syn(x...)         do { } while (0)
+#define TCP_ECN_send_synack(x...)      do { } while (0)
+#define TCP_ECN_make_synack(x...)      do { } while (0)
+#define TCP_ECN_send(x...)             do { } while (0)
+
+#define TCP_ECN_queue_cwr(x...)                do { } while (0)
+
+#define TCP_ECN_accept_cwr(x...)       do { } while (0)
+#define TCP_ECN_check_ce(x...)         do { } while (0)
+#define TCP_ECN_rcv_synack(x...)       do { } while (0)
+#define TCP_ECN_rcv_syn(x...)          do { } while (0)
+#define TCP_ECN_rcv_ecn_echo(x...)     (0)
+#define TCP_ECN_openreq_child(x...)    do { } while (0)
+#define TCP_ECN_create_request(x...)   do { } while (0)
+
+
+#endif
+
+#endif
index 641de8b220f21d9093311983ae81c37ee4f8d7ef..3bb7ab55a8eac39c3f8a8bb5283b11d1926ebac8 100644 (file)
@@ -325,7 +325,8 @@ static inline int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
        if (retval)
                goto free_pt;
 
-       init_new_context(tsk,mm);
+       if (init_new_context(tsk,mm))
+               goto free_pt;
 
 good_mm:
        tsk->mm = mm;
index 996fe279b54fc05788ac5896fce01094a4330530..3ce8829e4eeaced5434f466685e0c256ca0401f6 100644 (file)
@@ -250,7 +250,7 @@ char * strsep(char **s, const char * ct)
        
        *s = strpbrk( sbegin, ct);
        if (*s && **s != '\0')
-               **s++ = '\0';
+               *(*s)++ = '\0';
        return (sbegin);
 }
 #endif
index f6df3a526e2297cd0131a2757e7a5cd5a221a43c..ecbea0425e27743e31b26df073115dcc71a119db 100644 (file)
@@ -832,7 +832,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, int override
        if (lladdr != neigh->ha) {
                memcpy(&neigh->ha, lladdr, dev->addr_len);
                neigh_update_hhs(neigh);
-               neigh->confirmed = jiffies - (neigh->parms->base_reachable_time<<1);
+               if (!(new&NUD_CONNECTED))
+                       neigh->confirmed = jiffies - (neigh->parms->base_reachable_time<<1);
 #ifdef CONFIG_ARPD
                notify = 1;
 #endif
index 3aaa2fd9a60871e97e735e8adbba71901a0d85fa..ce5d7c081e3268908e580fb9e504ce236707b876 100644 (file)
@@ -7,7 +7,7 @@
  *             handler for protocols to use and generic option handler.
  *
  *
- * Version:    $Id: sock.c,v 1.96 2000/07/26 01:04:14 davem Exp $
+ * Version:    $Id: sock.c,v 1.97 2000/08/09 11:59:03 davem Exp $
  *
  * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -231,6 +231,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
                        if (val > sysctl_wmem_max)
                                val = sysctl_wmem_max;
 
+                       sk->userlocks |= SOCK_SNDBUF_LOCK;
                        sk->sndbuf = max(val*2,SOCK_MIN_SNDBUF);
 
                        /*
@@ -249,6 +250,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
                        if (val > sysctl_rmem_max)
                                val = sysctl_rmem_max;
 
+                       sk->userlocks |= SOCK_RCVBUF_LOCK;
                        /* FIXME: is this lower bound the right one? */
                        sk->rcvbuf = max(val*2,SOCK_MIN_RCVBUF);
                        break;
index b162de66c5b400f75958ef15e5d69057c81bb9ee..1a6a53bc883702d47ce59563bef51f28dfad1e91 100644 (file)
@@ -11,7 +11,7 @@ O_TARGET := ipv4.o
 IPV4_OBJS := utils.o route.o inetpeer.o proc.o protocol.o \
             ip_input.o ip_fragment.o ip_forward.o ip_options.o \
             ip_output.o ip_sockglue.o \
-            tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o\
+            tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o tcp_minisocks.o \
             raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
             sysctl_net_ipv4.o fib_frontend.o fib_semantics.o fib_hash.o
 IPV4X_OBJS :=
index 40aa7cd3aa3abb1aca9340fa719cb8e0137d6f66..c35f35f1e5a853f2109725f6ad073bec08bd4c47 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             PF_INET protocol family socket handler.
  *
- * Version:    $Id: af_inet.c,v 1.110 2000/04/25 04:13:34 davem Exp $
+ * Version:    $Id: af_inet.c,v 1.111 2000/08/09 11:59:03 davem Exp $
  *
  * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -159,6 +159,8 @@ void inet_sock_destruct(struct sock *sk)
 
        BUG_TRAP(atomic_read(&sk->rmem_alloc) == 0);
        BUG_TRAP(atomic_read(&sk->wmem_alloc) == 0);
+       BUG_TRAP(sk->wmem_queued == 0);
+       BUG_TRAP(sk->forward_alloc == 0);
 
        if (sk->protinfo.af_inet.opt)
                kfree(sk->protinfo.af_inet.opt);
@@ -494,6 +496,8 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                goto out;
        }
 
+       if (sk->rcv_saddr)
+               sk->userlocks |= SOCK_BINDADDR_LOCK;
        sk->sport = htons(sk->num);
        sk->daddr = 0;
        sk->dport = 0;
index 22429bb5e03207a087ea15d63e90a5813c360f85..4287c7525410fc37164e96203c8ea4fa524cdb17 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             The IP to API glue.
  *             
- * Version:    $Id: ip_sockglue.c,v 1.50 2000/07/26 01:04:17 davem Exp $
+ * Version:    $Id: ip_sockglue.c,v 1.51 2000/08/09 11:59:04 davem Exp $
  *
  * Authors:    see ip.c
  *
@@ -724,16 +724,14 @@ int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *op
                        break;
                case IP_MULTICAST_IF:
                {
-                       struct ip_mreqn mreq;
-                       len = min(len,sizeof(struct ip_mreqn));
-                       mreq.imr_ifindex = sk->protinfo.af_inet.mc_index;
-                       mreq.imr_address.s_addr = sk->protinfo.af_inet.mc_addr;
-                       mreq.imr_multiaddr.s_addr = 0;
+                       struct in_addr addr;
+                       len = min(len,sizeof(struct in_addr));
+                       addr.s_addr = sk->protinfo.af_inet.mc_addr;
                        release_sock(sk);
 
                        if(put_user(len, optlen))
                                return -EFAULT;
-                       if(copy_to_user((void *)optval, &mreq, len))
+                       if(copy_to_user((void *)optval, &addr, len))
                                return -EFAULT;
                        return 0;
                }
index cbf6e19ebd6db0efa76b2d56ff4a683c0f3c1564..cb430624f87ade4e5402c999abdaa23d61cfa2d9 100644 (file)
@@ -19,7 +19,8 @@ IP_NF_COMPAT_LAYER:=ip_fw_compat.o ip_fw_compat_redir.o ip_fw_compat_masq.o $(IP
 
 # Link order matters here.
 ifeq ($(CONFIG_IP_NF_CONNTRACK),y)
-O_OBJS += ip_conntrack_standalone.o $(IP_NF_CONNTRACK_OBJ)
+OX_OBJS += ip_conntrack_standalone.o
+O_OBJS += $(IP_NF_CONNTRACK_OBJ)
 else
   ifeq ($(CONFIG_IP_NF_CONNTRACK),m)
   MI_OBJS += $(IP_NF_CONNTRACK_OBJ)
index da3f9782181829b5c1ce75a2aac4dff9d9572ea6..2e4dd82ee0ad50678f4e5ac63978ae5a04661694 100644 (file)
@@ -660,8 +660,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
        } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF)) 
                   == htonl(0x000000FF)) {
                printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
-                      IP_PARTS((*pskb)->nh.iph->saddr),
-                      IP_PARTS((*pskb)->nh.iph->daddr),
+                      NIPQUAD((*pskb)->nh.iph->saddr),
+                      NIPQUAD((*pskb)->nh.iph->daddr),
                       (*pskb)->sk, (*pskb)->pkt_type);
        }
 #endif
@@ -998,7 +998,7 @@ getorigdst(struct sock *sk, int optval, void *user, int *len)
                        .tuple.dst.ip;
 
                DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
-                      IP_PARTS(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+                      NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
                ip_conntrack_put(h->ctrack);
                if (copy_to_user(user, &sin, sizeof(sin)) != 0)
                        return -EFAULT;
@@ -1006,8 +1006,8 @@ getorigdst(struct sock *sk, int optval, void *user, int *len)
                        return 0;
        }
        DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
-              IP_PARTS(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
-              IP_PARTS(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
+              NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
+              NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
        return -ENOENT;
 }
 
index ce0023ec39f79ea759347e80d5c17259e2751a9b..cfdb28f123e059360db162d68404a5f53c984b0e 100644 (file)
@@ -21,14 +21,6 @@ struct module *ip_conntrack_ftp = THIS_MODULE;
 #define DEBUGP(format, args...)
 #endif
 
-#define IP_PARTS_NATIVE(n)                     \
-(unsigned int)((n)>>24)&0xFF,                  \
-(unsigned int)((n)>>16)&0xFF,                  \
-(unsigned int)((n)>>8)&0xFF,                   \
-(unsigned int)((n)&0xFF)
-
-#define IP_PARTS(n) IP_PARTS_NATIVE(ntohl(n))
-
 static struct {
        const char *pattern;
        size_t plen;
@@ -111,7 +103,7 @@ static int help(const struct iphdr *iph, size_t len,
                struct ip_conntrack *ct,
                enum ip_conntrack_info ctinfo)
 {
-       /* tcplen not negative guarenteed by ip_conntrack_tcp.c */
+       /* tcplen not negative guaranteed by ip_conntrack_tcp.c */
        struct tcphdr *tcph = (void *)iph + iph->ihl * 4;
        const char *data = (const char *)tcph + tcph->doff * 4;
        unsigned int tcplen = len - iph->ihl * 4;
@@ -142,8 +134,8 @@ static int help(const struct iphdr *iph, size_t len,
        if (tcp_v4_check(tcph, tcplen, iph->saddr, iph->daddr,
                         csum_partial((char *)tcph, tcplen, 0))) {
                DEBUGP("ftp_help: bad csum: %p %u %u.%u.%u.%u %u.%u.%u.%u\n",
-                      tcph, tcplen, IP_PARTS(iph->saddr),
-                      IP_PARTS(iph->daddr));
+                      tcph, tcplen, NIPQUAD(iph->saddr),
+                      NIPQUAD(iph->daddr));
                return NF_ACCEPT;
        }
 
index 20e4aa426711cc41ee6fb1accfd5f60c4ff8ffd5..f1faab1be18705f0aafeded954762a39bedeca25 100644 (file)
@@ -332,7 +332,6 @@ static void __exit fini(void)
 module_init(init);
 module_exit(fini);
 
-#ifdef MODULE
 EXPORT_SYMBOL(ip_conntrack_protocol_register);
 EXPORT_SYMBOL(invert_tuplepr);
 EXPORT_SYMBOL(ip_conntrack_alter_reply);
@@ -346,4 +345,3 @@ EXPORT_SYMBOL(ip_ct_refresh);
 EXPORT_SYMBOL(ip_conntrack_expect_related);
 EXPORT_SYMBOL(ip_conntrack_tuple_taken);
 EXPORT_SYMBOL(ip_ct_gather_frags);
-#endif
index 0f7b4f8ca1b33845396fa8954306870b1d87367b..9ba62dc8480a32326cd57d42096984930fa05529 100644 (file)
@@ -206,7 +206,7 @@ do_extra_mangle(u_int32_t var_ip, u_int32_t *other_ipp)
        /* FIXME: IPTOS_TOS(iph->tos) --RR */
        if (ip_route_output(&rt, var_ip, 0, 0, 0) != 0) {
                DEBUGP("do_extra_mangle: Can't get route to %u.%u.%u.%u\n",
-                      IP_PARTS(var_ip));
+                      NIPQUAD(var_ip));
                return 0;
        }
 
@@ -312,7 +312,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
                            && *var_ipp != orig_dstip
                            && !do_extra_mangle(*var_ipp, other_ipp)) {
                                DEBUGP("Range %u %u.%u.%u.%u rt failed!\n",
-                                      i, IP_PARTS(*var_ipp));
+                                      i, NIPQUAD(*var_ipp));
                                /* Can't route?  This whole range part is
                                 * probably screwed, but keep trying
                                 * anyway. */
@@ -513,8 +513,8 @@ ip_nat_setup_info(struct ip_conntrack *conntrack,
                       ? " PROTO_SPECIFIED" : "",
                       (mr->range[i].flags & IP_NAT_RANGE_FULL)
                       ? " FULL" : "",
-                      IP_PARTS(mr->range[i].min_ip),
-                      IP_PARTS(mr->range[i].max_ip),
+                      NIPQUAD(mr->range[i].min_ip),
+                      NIPQUAD(mr->range[i].max_ip),
                       mr->range[i].min.all,
                       mr->range[i].max.all);
        }
@@ -715,7 +715,7 @@ do_bindings(struct ip_conntrack *ct,
                               *pskb,
                               info->manips[i].maniptype == IP_NAT_MANIP_SRC
                               ? "SRC" : "DST",
-                              IP_PARTS(info->manips[i].manip.ip),
+                              NIPQUAD(info->manips[i].manip.ip),
                               htons(info->manips[i].manip.u.all));
                        manip_pkt((*pskb)->nh.iph->protocol,
                                  (*pskb)->nh.iph,
@@ -797,7 +797,7 @@ icmp_reply_translation(struct sk_buff *skb,
                        DEBUGP("icmp_reply: inner %s -> %u.%u.%u.%u %u\n",
                               info->manips[i].maniptype == IP_NAT_MANIP_SRC
                               ? "DST" : "SRC",
-                              IP_PARTS(info->manips[i].manip.ip),
+                              NIPQUAD(info->manips[i].manip.ip),
                               ntohs(info->manips[i].manip.u.udp.port));
                        manip_pkt(inner->protocol, inner,
                                  skb->len - ((void *)inner - (void *)iph),
@@ -812,7 +812,7 @@ icmp_reply_translation(struct sk_buff *skb,
                        DEBUGP("icmp_reply: outer %s -> %u.%u.%u.%u\n",
                               info->manips[i].maniptype == IP_NAT_MANIP_SRC
                               ? "SRC" : "DST",
-                              IP_PARTS(info->manips[i].manip.ip));
+                              NIPQUAD(info->manips[i].manip.ip));
                        manip_pkt(0, iph, skb->len,
                                  &info->manips[i].manip,
                                  info->manips[i].maniptype,
index d4eb364059982690437e2c1eec041a782f6be3f9..c3d8ccab084802120d3e3d4e342c26eb72a6479f 100644 (file)
@@ -54,13 +54,13 @@ ftp_nat_expected(struct sk_buff **pskb,
                newdstip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
                newsrcip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
                DEBUGP("nat_expected: PORT cmd. %u.%u.%u.%u->%u.%u.%u.%u\n",
-                      IP_PARTS(newsrcip), IP_PARTS(newdstip));
+                      NIPQUAD(newsrcip), NIPQUAD(newdstip));
        } else {
                /* PASV command: make the connection go to the server */
                newdstip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
                newsrcip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
                DEBUGP("nat_expected: PASV cmd. %u.%u.%u.%u->%u.%u.%u.%u\n",
-                      IP_PARTS(newsrcip), IP_PARTS(newdstip));
+                      NIPQUAD(newsrcip), NIPQUAD(newdstip));
        }
        UNLOCK_BH(&ip_ftp_lock);
 
@@ -69,7 +69,7 @@ ftp_nat_expected(struct sk_buff **pskb,
        else
                newip = newdstip;
 
-       DEBUGP("nat_expected: IP to %u.%u.%u.%u\n", IP_PARTS(newip));
+       DEBUGP("nat_expected: IP to %u.%u.%u.%u\n", NIPQUAD(newip));
 
        mr.rangesize = 1;
        /* We don't want to manip the per-protocol, just the IPs... */
@@ -110,7 +110,7 @@ mangle_packet(struct sk_buff **pskb,
 
        MUST_BE_LOCKED(&ip_ftp_lock);
        sprintf(buffer, "%u,%u,%u,%u,%u,%u",
-               IP_PARTS(newip), port>>8, port&0xFF);
+               NIPQUAD(newip), port>>8, port&0xFF);
 
        tcplen = (*pskb)->len - iph->ihl * 4;
        newtcplen = tcplen - matchlen + strlen(buffer);
index 1ebea495c03fce6ea8b9a68c86e21a50a2c75343..a22858cb3fa364212a4b942ca5364313f0d25fc1 100644 (file)
@@ -226,7 +226,7 @@ alloc_null_binding(struct ip_conntrack *conntrack,
                = { 1, { { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } } } };
 
        DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n", conntrack,
-              IP_PARTS(ip));
+              NIPQUAD(ip));
        return ip_nat_setup_info(conntrack, &mr, hooknum);
 }
 
index 4f8a8de07bee8970c24cc2d83b55fd0e232ef36f..85787ed88966235aefc99ce85889194d5c9e4a8f 100644 (file)
@@ -4,10 +4,11 @@
  *
  * (C) 2000 James Morris, this code is GPL.
  *
- * 2000-03-27: Simplified code (thanks to Andi Kleen for clues). (JM)
- * 2000-05-20: Fixed notifier problems (following Miguel Freitas' report). (JM)
+ * 2000-03-27: Simplified code (thanks to Andi Kleen for clues).
+ * 2000-05-20: Fixed notifier problems (following Miguel Freitas' report).
  * 2000-06-19: Fixed so nfmark is copied to metadata (reported by Sebastian 
- *             Zander). (JM)
+ *             Zander).
+ * 2000-08-01: Added Nick Williams' MAC support.
  *
  */
 #include <linux/module.h>
@@ -398,6 +399,14 @@ static struct sk_buff *netlink_build_message(ipq_queue_element_t *e, int *errp)
        else pm->indev_name[0] = '\0';
        if (e->info->outdev) strcpy(pm->outdev_name, e->info->outdev->name);
        else pm->outdev_name[0] = '\0';
+       pm->hw_protocol = e->skb->protocol;
+       if (e->skb->rx_dev) {
+               pm->hw_type = e->skb->rx_dev->type;
+               if (e->skb->rx_dev->hard_header_parse)
+                       pm->hw_addrlen =
+                               e->skb->rx_dev->hard_header_parse(e->skb,
+                                                                 pm->hw_addr);
+       }
        if (data_len)
                memcpy(pm->payload, e->skb->data, data_len);
        nlh->nlmsg_len = skb->tail - old_tail;
index a04a5a8019dc8d02412190d7bcf6c23c435f60bb..bdb4fd99c2fce4570054171cc2819efd7e4ab12c 100644 (file)
@@ -288,14 +288,15 @@ ipt_log_target(struct sk_buff **pskb,
        if (in && !out) {
                /* MAC logging for input chain only. */
                printk("MAC=");
-               if ((*pskb)->dev && (*pskb)->dev->hard_header_len) {
+               if ((*pskb)->dev && (*pskb)->dev->hard_header_len && (*pskb)->mac.raw != iph) {
                        int i;
                        unsigned char *p = (*pskb)->mac.raw;
                        for (i = 0; i < (*pskb)->dev->hard_header_len; i++,p++)
                                printk("%02x%c", *p,
                                       i==(*pskb)->dev->hard_header_len - 1
                                       ? ' ':':');
-               }
+               } else
+                       printk(" ");
        }
 
        dump_packet(loginfo, iph, (*pskb)->len, 1);
index 2f9c119157e769ecefebed31e21532d6f2698d21..99164a7a0201c2e35711b9d94da807758bf4e90d 100644 (file)
@@ -92,7 +92,7 @@ masquerade_target(struct sk_buff **pskb,
        }
 
        newsrc = rt->rt_src;
-       DEBUGP("newsrc = %u.%u.%u.%u\n", IP_PARTS(newsrc));
+       DEBUGP("newsrc = %u.%u.%u.%u\n", NIPQUAD(newsrc));
        ip_rt_put(rt);
 
        WRITE_LOCK(&masq_lock);
index 7c8bf2f1e54c15427c55c60908fb1808f3af6f12..2d8ad255f074a2074561a83d727e81fd9aa02a21 100644 (file)
@@ -247,11 +247,6 @@ static int check(const char *tablename,
                        DEBUGP("REJECT: TCP_RESET illegal for non-tcp\n");
                        return 0;
                }
-               /* Only for local input.  Rest is too dangerous. */
-               if ((hook_mask & ~(1 << NF_IP_LOCAL_IN)) != 0) {
-                       DEBUGP("REJECT: TCP_RESET only from INPUT\n");
-                       return 0;
-               }
        }
 
        return 1;
index ae0904a4d5ed341425adcf402a23b4f600752bb5..6665f1ce475a65228486e9fb9ec4f2033e0e5e08 100644 (file)
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv4/ipt_limit.h>
 
-#define IP_PARTS_NATIVE(n)                     \
-(unsigned int)((n)>>24)&0xFF,                  \
-(unsigned int)((n)>>16)&0xFF,                  \
-(unsigned int)((n)>>8)&0xFF,                   \
-(unsigned int)((n)&0xFF)
-
-#define IP_PARTS(n) IP_PARTS_NATIVE(ntohl(n))
-
 /* The algorithm used is the Simple Token Bucket Filter (TBF)
  * see net/sched/sch_tbf.c in the linux source tree
  */
index f1ff8f1eede31aa29f39fa80be7514f6f854b49e..559d75aac4ecdbbafb906f7840d47050c1a1452e 100644 (file)
@@ -7,7 +7,7 @@
  *             PROC file system.  It is mainly used for debugging and
  *             statistics.
  *
- * Version:    $Id: proc.c,v 1.43 2000/07/07 22:29:42 davem Exp $
+ * Version:    $Id: proc.c,v 1.44 2000/08/09 11:59:04 davem Exp $
  *
  * Authors:    Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *             Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de>
@@ -71,9 +71,11 @@ int afinet_get_info(char *buffer, char **start, off_t offset, int length)
 
        int len  = socket_get_info(buffer,start,offset,length);
 
-       len += sprintf(buffer+len,"TCP: inuse %d orphan %d tw %d\n",
+       len += sprintf(buffer+len,"TCP: inuse %d orphan %d tw %d alloc %d mem %d\n",
                       fold_prot_inuse(&tcp_prot),
-                      atomic_read(&tcp_orphan_count), tcp_tw_count);
+                      atomic_read(&tcp_orphan_count), tcp_tw_count,
+                      atomic_read(&tcp_sockets_allocated),
+                      atomic_read(&tcp_memory_allocated));
        len += sprintf(buffer+len,"UDP: inuse %d\n",
                       fold_prot_inuse(&udp_prot));
        len += sprintf(buffer+len,"RAW: inuse %d\n",
@@ -175,7 +177,22 @@ int netstat_get_info(char *buffer, char **start, off_t offset, int length)
                      " ListenOverflows ListenDrops"
                      " TCPPrequeued TCPDirectCopyFromBacklog"
                      " TCPDirectCopyFromPrequeue TCPPrequeueDropped"
-                     " TCPHPHits TCPHPHitsToUser\n"
+                     " TCPHPHits TCPHPHitsToUser"
+                     " TCPPureAcks TCPHPAcks"
+                     " TCPRenoRecovery TCPSackRecovery"
+                     " TCPSACKReneging"
+                     " TCPFACKReorder TCPSACKReorder TCPRenoReorder TCPTSReorder"
+                     " TCPFullUndo TCPPartialUndo TCPDSACKUndo TCPLossUndo"
+                     " TCPLoss TCPLostRetransmit"
+                     " TCPRenoFailures TCPSackFailures TCPLossFailures"
+                     " TCPFastRetrans TCPForwardRetrans TCPSlowStartRetrans"
+                     " TCPTimeouts"
+                     " TCPRenoRecoveryFail TCPSackRecoveryFail"
+                     " TCPSchedulerFailed TCPRcvCollapsed"
+                     " TCPDSACKOldSent TCPDSACKOfoSent TCPDSACKRecv TCPDSACKOfoRecv"
+                     " TCPAbortOnSyn TCPAbortOnData TCPAbortOnClose"
+                     " TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger"
+                     " TCPAbortFailed TCPMemoryPressures\n"
                      "TcpExt:");
        for (i=0; i<offsetof(struct linux_mib, __pad)/sizeof(unsigned long); i++)
                len += sprintf(buffer+len, " %lu", fold_field((unsigned long*)net_statistics, sizeof(struct linux_mib), i));
index 5ac30dc40130f91fca898eee9f321da635d663cc..81f20361a56697d49471c948aa561b3cd3cb10e7 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             RAW - implementation of IP "raw" sockets.
  *
- * Version:    $Id: raw.c,v 1.52 2000/07/08 00:20:43 davem Exp $
+ * Version:    $Id: raw.c,v 1.53 2000/08/09 11:59:04 davem Exp $
  *
  * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -628,8 +628,8 @@ static void get_raw_sock(struct sock *sp, char *tmpbuf, int i)
                i, src, srcp, dest, destp, sp->state, 
                atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
                timer_active, timer_expires-jiffies, 0,
-               sp->socket->inode->i_uid, 0,
-               sp->socket ? sp->socket->inode->i_ino : 0,
+               sock_i_uid(sp), 0,
+               sock_i_ino(sp),
                atomic_read(&sp->refcnt), sp);
 }
 
index eb00518bdd9960859e918d3051faf72b3a3bd023..d4e9806a0ff2ce2110efefdd368a12f52cf8e799 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             ROUTE - implementation of the IP router.
  *
- * Version:    $Id: route.c,v 1.88 2000/07/07 23:47:45 davem Exp $
+ * Version:    $Id: route.c,v 1.89 2000/08/09 11:59:04 davem Exp $
  *
  * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -1127,8 +1127,6 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
                memcpy(&rt->u.dst.mxlock, fi->fib_metrics, sizeof(fi->fib_metrics));
                if (fi->fib_mtu == 0) {
                        rt->u.dst.pmtu = rt->u.dst.dev->mtu;
-                       if (rt->u.dst.pmtu > IP_MAX_MTU)
-                               rt->u.dst.pmtu = IP_MAX_MTU;
                        if (rt->u.dst.mxlock&(1<<RTAX_MTU) &&
                            rt->rt_gateway != rt->rt_dst &&
                            rt->u.dst.pmtu > 576)
@@ -1139,9 +1137,9 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
 #endif
        } else {
                rt->u.dst.pmtu  = rt->u.dst.dev->mtu;
-               if (rt->u.dst.pmtu > IP_MAX_MTU)
-                       rt->u.dst.pmtu = IP_MAX_MTU;
        }
+       if (rt->u.dst.pmtu > IP_MAX_MTU)
+               rt->u.dst.pmtu = IP_MAX_MTU;
        if (rt->u.dst.advmss == 0)
                rt->u.dst.advmss = max(rt->u.dst.dev->mtu-40, ip_rt_min_advmss);
        if (rt->u.dst.advmss > 65535-40)
index d9416525ba7f7b2ccb1c52060b0604782588ab35..4274045e875e7fc6e4b8d23c87b52fba83798922 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
  *
- * $Id: sysctl_net_ipv4.c,v 1.43 2000/01/16 05:11:27 davem Exp $
+ * $Id: sysctl_net_ipv4.c,v 1.44 2000/08/09 11:59:04 davem Exp $
  *
  * Begun April 1, 1996, Mike Shaver.
  * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
@@ -209,6 +209,24 @@ ctl_table ipv4_table[] = {
         &proc_dointvec_jiffies, &sysctl_jiffies},
        {NET_TCP_ORPHAN_RETRIES, "tcp_orphan_retries",
         &sysctl_tcp_orphan_retries, sizeof(int), 0644, NULL, &proc_dointvec},
+       {NET_TCP_FACK, "tcp_fack",
+        &sysctl_tcp_fack, sizeof(int), 0644, NULL, &proc_dointvec},
+       {NET_TCP_REORDERING, "tcp_reordering",
+        &sysctl_tcp_reordering, sizeof(int), 0644, NULL, &proc_dointvec},
+       {NET_TCP_ECN, "tcp_ecn",
+        &sysctl_tcp_ecn, sizeof(int), 0644, NULL, &proc_dointvec},
+       {NET_TCP_DSACK, "tcp_dsack",
+        &sysctl_tcp_dsack, sizeof(int), 0644, NULL, &proc_dointvec},
+       {NET_TCP_MEM, "tcp_mem",
+        &sysctl_tcp_mem, sizeof(sysctl_tcp_mem), 0644, NULL, &proc_dointvec},
+       {NET_TCP_WMEM, "tcp_wmem",
+        &sysctl_tcp_wmem, sizeof(sysctl_tcp_wmem), 0644, NULL, &proc_dointvec},
+       {NET_TCP_RMEM, "tcp_rmem",
+        &sysctl_tcp_rmem, sizeof(sysctl_tcp_rmem), 0644, NULL, &proc_dointvec},
+       {NET_TCP_APP_WIN, "tcp_app_win",
+        &sysctl_tcp_app_win, sizeof(int), 0644, NULL, &proc_dointvec},
+       {NET_TCP_ADV_WIN_SCALE, "tcp_adv_win_scale",
+        &sysctl_tcp_adv_win_scale, sizeof(int), 0644, NULL, &proc_dointvec},
        {0}
 };
 
index dbf680233fc4e9a5808f7731f1fb6c7684964f6b..8745fde60151d65f029baf9ef13c28fa99db48aa 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:    $Id: tcp.c,v 1.170 2000/07/08 00:20:43 davem Exp $
+ * Version:    $Id: tcp.c,v 1.171 2000/08/09 11:59:04 davem Exp $
  *
  * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *                                     tcp_do_sendmsg to avoid burstiness.
  *             Eric Schenk     :       Fix fast close down bug with
  *                                     shutdown() followed by close().
- *             Andi Kleen :    Make poll agree with SIGIO
+ *             Andi Kleen      :       Make poll agree with SIGIO
  *     Salvatore Sanfilippo    :       Support SO_LINGER with linger == 1 and
  *                                     lingertime == 0 (RFC 793 ABORT Call)
  *                                     
@@ -436,6 +436,96 @@ kmem_cache_t *tcp_timewait_cachep;
 
 atomic_t tcp_orphan_count = ATOMIC_INIT(0);
 
+int sysctl_tcp_mem[3] = { 0, };
+int sysctl_tcp_wmem[3] = { 4*1024, 16*1024, 128*1024 };
+int sysctl_tcp_rmem[3] = { 4*1024, 87380, 87380*2 };
+
+atomic_t tcp_memory_allocated; /* Current allocated memory. */
+atomic_t tcp_sockets_allocated;        /* Current number of TCP sockets. */
+
+/* Pressure flag: try to collapse.
+ * Technical note: it is used by multiple contexts non atomically.
+ * All the tcp_mem_schedule() is of this nature: accounting
+ * is strict, actions are advisory and have some latency. */
+int tcp_memory_pressure;
+
+#define TCP_PAGES(amt) (((amt)+TCP_MEM_QUANTUM-1)/TCP_MEM_QUANTUM)
+
+int tcp_mem_schedule(struct sock *sk, int size, int kind)
+{
+       int amt = TCP_PAGES(size);
+
+       sk->forward_alloc += amt*TCP_MEM_QUANTUM;
+       atomic_add(amt, &tcp_memory_allocated);
+
+       /* Under limit. */
+       if (atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+               if (tcp_memory_pressure)
+                       tcp_memory_pressure = 0;
+               return 1;
+       }
+
+       /* Over hard limit. */
+       if (atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) {
+               tcp_enter_memory_pressure();
+               goto suppress_allocation;
+       }
+
+       /* Under pressure. */
+       if (atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[1])
+               tcp_enter_memory_pressure();
+
+       if (kind) {
+               if (atomic_read(&sk->rmem_alloc) < sysctl_tcp_rmem[0])
+                       return 1;
+       } else {
+               if (sk->wmem_queued < sysctl_tcp_wmem[0])
+                       return 1;
+       }
+
+       if (!tcp_memory_pressure ||
+           sysctl_tcp_mem[2] > atomic_read(&tcp_sockets_allocated)
+           * TCP_PAGES(sk->wmem_queued+atomic_read(&sk->rmem_alloc)+
+                       sk->forward_alloc))
+               return 1;
+
+suppress_allocation:
+
+       if (kind == 0) {
+               tcp_moderate_sndbuf(sk);
+
+               /* Fail only if socket is _under_ its sndbuf.
+                * In this case we cannot block, so that we have to fail.
+                */
+               if (sk->wmem_queued+size >= sk->sndbuf)
+                       return 1;
+       }
+
+       /* Alas. Undo changes. */
+       sk->forward_alloc -= amt*TCP_MEM_QUANTUM;
+       atomic_sub(amt, &tcp_memory_allocated);
+       return 0;
+}
+
+void __tcp_mem_reclaim(struct sock *sk)
+{
+       if (sk->forward_alloc >= TCP_MEM_QUANTUM) {
+               atomic_sub(sk->forward_alloc/TCP_MEM_QUANTUM, &tcp_memory_allocated);
+               sk->forward_alloc &= (TCP_MEM_QUANTUM-1);
+               if (tcp_memory_pressure &&
+                   atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
+                       tcp_memory_pressure = 0;
+       }
+}
+
+void tcp_rfree(struct sk_buff *skb)
+{
+       struct sock *sk = skb->sk;
+
+       atomic_sub(skb->truesize, &sk->rmem_alloc);
+       sk->forward_alloc += skb->truesize;
+}
+
 /*
  * LISTEN is a special case for poll..
  */
@@ -504,6 +594,9 @@ unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait)
 
        /* Connected? */
        if ((1 << sk->state) & ~(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
+               /* Potential race condition. If read of tp below will
+                * escape above sk->state, we can be illegally awaken
+                * in SYN_* states. */
                if ((tp->rcv_nxt != tp->copied_seq) &&
                    (tp->urg_seq != tp->copied_seq ||
                     tp->rcv_nxt != tp->copied_seq+1 ||
@@ -511,7 +604,7 @@ unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait)
                        mask |= POLLIN | POLLRDNORM;
 
                if (!(sk->shutdown & SEND_SHUTDOWN)) {
-                       if (sock_wspace(sk) >= tcp_min_write_space(sk)) {
+                       if (tcp_wspace(sk) >= tcp_min_write_space(sk)) {
                                mask |= POLLOUT | POLLWRNORM;
                        } else {  /* send SIGIO later */
                                set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
@@ -521,7 +614,7 @@ unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait)
                                 * wspace test but before the flags are set,
                                 * IO signal will be lost.
                                 */
-                               if (sock_wspace(sk) >= tcp_min_write_space(sk))
+                               if (tcp_wspace(sk) >= tcp_min_write_space(sk))
                                        mask |= POLLOUT | POLLWRNORM;
                        }
                }
@@ -533,38 +626,9 @@ unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait)
 }
 
 /*
- *     Socket write_space callback.
- *     This (or rather the sock_wake_async) should agree with poll.
- *
- *     WARNING. This callback is called, when socket is not locked.
- *
- *     This wakeup is used by TCP only as dead-lock breaker, real
- *     wakeup occurs when incoming ack frees some space in buffer.
+ *     TCP socket write_space callback. Not used.
  */
 void tcp_write_space(struct sock *sk)
-{
-       struct socket *sock;
-
-       read_lock(&sk->callback_lock);
-       if ((sock = sk->socket) != NULL && atomic_read(&sk->wmem_alloc) == 0) {
-               if (test_bit(SOCK_NOSPACE, &sock->flags)) {
-                       if (sk->sleep && waitqueue_active(sk->sleep)) {
-                               clear_bit(SOCK_NOSPACE, &sock->flags);
-                               wake_up_interruptible(sk->sleep);
-                       }
-               }
-
-               if (sock->fasync_list)
-                       sock_wake_async(sock, 2, POLL_OUT);
-       }
-       read_unlock(&sk->callback_lock);
-}
-
-/* Listening TCP sockets never sleep to wait for memory, so
- * it is completely silly to wake them up on queue space
- * available events.  So we hook them up to this dummy callback.
- */
-static void tcp_listen_write_space(struct sock *sk)
 {
 }
 
@@ -647,7 +711,6 @@ int tcp_listen_start(struct sock *sk)
        if (sk->prot->get_port(sk, sk->num) == 0) {
                sk->sport = htons(sk->num);
 
-               sk->write_space = tcp_listen_write_space;
                sk_dst_reset(sk);
                sk->prot->hash(sk);
 
@@ -774,7 +837,7 @@ static int wait_for_tcp_connect(struct sock * sk, int flags, long *timeo_p)
 
 static inline int tcp_memory_free(struct sock *sk)
 {
-       return atomic_read(&sk->wmem_alloc) < sk->sndbuf;
+       return sk->wmem_queued < sk->sndbuf;
 }
 
 /*
@@ -782,33 +845,44 @@ static inline int tcp_memory_free(struct sock *sk)
  */
 static long wait_for_tcp_memory(struct sock * sk, long timeo)
 {
-       if (!tcp_memory_free(sk)) {
-               DECLARE_WAITQUEUE(wait, current);
+       long vm_wait = 0;
+       long current_timeo = timeo;
+       DECLARE_WAITQUEUE(wait, current);
 
-               clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
+       if (tcp_memory_free(sk))
+               current_timeo = vm_wait = (net_random()%(HZ/5))+2;
 
-               add_wait_queue(sk->sleep, &wait);
-               for (;;) {
-                       set_bit(SOCK_NOSPACE, &sk->socket->flags);
+       clear_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
 
-                       set_current_state(TASK_INTERRUPTIBLE);
+       add_wait_queue(sk->sleep, &wait);
+       for (;;) {
+               set_bit(SOCK_NOSPACE, &sk->socket->flags);
 
-                       if (signal_pending(current))
-                               break;
-                       if (tcp_memory_free(sk))
-                               break;
-                       if (sk->shutdown & SEND_SHUTDOWN)
-                               break;
-                       if (sk->err)
-                               break;
-                       release_sock(sk);
-                       if (!tcp_memory_free(sk))
-                               timeo = schedule_timeout(timeo);
-                       lock_sock(sk);
+               set_current_state(TASK_INTERRUPTIBLE);
+
+               if (signal_pending(current))
+                       break;
+               if (tcp_memory_free(sk) && !vm_wait)
+                       break;
+               if (sk->shutdown & SEND_SHUTDOWN)
+                       break;
+               if (sk->err)
+                       break;
+               release_sock(sk);
+               if (!tcp_memory_free(sk) || vm_wait)
+                       current_timeo = schedule_timeout(current_timeo);
+               lock_sock(sk);
+               if (vm_wait) {
+                       if (timeo != MAX_SCHEDULE_TIMEOUT &&
+                           (timeo -= vm_wait-current_timeo) < 0)
+                               timeo = 0;
+                       break;
+               } else {
+                       timeo = current_timeo;
                }
-               current->state = TASK_RUNNING;
-               remove_wait_queue(sk->sleep, &wait);
        }
+       current->state = TASK_RUNNING;
+       remove_wait_queue(sk->sleep, &wait);
        return timeo;
 }
 
@@ -925,43 +999,35 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size)
                                        from += copy;
                                        copied += copy;
                                        seglen -= copy;
-                                       if (PSH_NEEDED)
+                                       if (PSH_NEEDED ||
+                                           after(tp->write_seq, tp->pushed_seq+(tp->max_window>>1))) {
                                                TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+                                               tp->pushed_seq = tp->write_seq;
+                                       }
                                        continue;
                                }
                        }
 
-                       /* A chunk was here doing something strange
-                        * with psh etc. It is deleted, because it was
-                        * evident non-sense.                    --ANK
-                        */
-
                        copy = min(seglen, mss_now);
 
                        /* Determine how large of a buffer to allocate.  */
-                       tmp = MAX_TCP_HEADER + 15;
+                       tmp = MAX_TCP_HEADER + 15 + tp->mss_cache;
                        if (copy < mss_now && !(flags & MSG_OOB)) {
-                               tmp += mss_now;
-
                                /* What is happening here is that we want to
                                 * tack on later members of the users iovec
                                 * if possible into a single frame.  When we
-                                * leave this loop our caller checks to see if
+                                * leave this loop our we check to see if
                                 * we can send queued frames onto the wire.
-                                * See tcp_v[46]_sendmsg() for this.
                                 */
                                queue_it = 1;
                        } else {
-                               tmp += copy;
                                queue_it = 0;
                        }
 
-                       if (tcp_memory_free(sk)) {
-                               skb = alloc_skb(tmp, GFP_KERNEL);
-                               if (skb == NULL)
-                                       goto do_oom;
-                               skb_set_owner_w(skb, sk);
-                       } else {
+                       skb = NULL;
+                       if (tcp_memory_free(sk))
+                               skb = tcp_alloc_skb(sk, tmp, GFP_KERNEL);
+                       if (skb == NULL) {
                                /* If we didn't get any memory, we need to sleep. */
                                set_bit(SOCK_ASYNC_NOSPACE, &sk->socket->flags);
                                set_bit(SOCK_NOSPACE, &sk->socket->flags);
@@ -987,11 +1053,18 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size)
                        seglen -= copy;
 
                        /* Prepare control bits for TCP header creation engine. */
-                       TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK |
-                                                 ((PSH_NEEDED) ?
-                                                  TCPCB_FLAG_PSH : 0));
+                       TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
+                       if (PSH_NEEDED ||
+                           after(tp->write_seq+copy, tp->pushed_seq+(tp->max_window>>1))) {
+                               TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK|TCPCB_FLAG_PSH;
+                               tp->pushed_seq = tp->write_seq + copy;
+                       } else {
+                               TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
+                       }
                        TCP_SKB_CB(skb)->sacked = 0;
                        if (flags & MSG_OOB) {
+                               /* Funny. 8) This makes URG fully meaningless.
+                                * Well, OK. It does not contradict to anything yet. */
                                TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_URG;
                                TCP_SKB_CB(skb)->urg_ptr = copy;
                        } else
@@ -1041,15 +1114,12 @@ do_shutdown:
                err = -EPIPE;
        }
        goto out;
-do_oom:
-       err = copied ? : -ENOBUFS;
-       goto out;
 do_interrupted:
        if(copied)
                err = copied;
        goto out;
 do_fault:
-       kfree_skb(skb);
+       __kfree_skb(skb);
 do_fault2:
        err = -EFAULT;
        goto out;
@@ -1072,7 +1142,7 @@ static int tcp_recv_urg(struct sock * sk, long timeo,
        if (sk->urginline || !tp->urg_data || tp->urg_data == TCP_URG_READ)
                return -EINVAL; /* Yes this is right ! */
 
-       if (sk->done)
+       if (sk->state==TCP_CLOSE && !sk->done)
                return -ENOTCONN;
 
        if (tp->urg_data & TCP_URG_VALID) {
@@ -1095,7 +1165,6 @@ static int tcp_recv_urg(struct sock * sk, long timeo,
                return err ? -EFAULT : len;
        }
 
-       /* Do not set sk->done, it is set only by normal data receive */
        if (sk->state == TCP_CLOSE || (sk->shutdown & RCV_SHUTDOWN))
                return 0;
 
@@ -1117,8 +1186,6 @@ static int tcp_recv_urg(struct sock * sk, long timeo,
 static inline void tcp_eat_skb(struct sock *sk, struct sk_buff * skb)
 {
        __skb_unlink(skb, &sk->receive_queue);
-       BUG_TRAP(atomic_read(&skb->users) == 1);
-       /* Well, if I missed something then punishment will be terrible oops. */
        __kfree_skb(skb);
 }
 
@@ -1143,34 +1210,19 @@ static void cleanup_rbuf(struct sock *sk, int copied)
                tcp_eat_skb(sk, skb);
        }
 
-       if (tp->ack.pending) {
+       if (tcp_ack_scheduled(tp)) {
                   /* Delayed ACKs frequently hit locked sockets during bulk receive. */
                if (tp->ack.blocked
-#ifdef TCP_MORE_COARSE_ACKS
                    /* Once-per-two-segments ACK was not sent by tcp_input.c */
                    || tp->rcv_nxt - tp->rcv_wup > tp->ack.rcv_mss
-#endif
                    /*
-                    * If this read emptied read buffer, we send ACK when:
-                    *
-                    * -- ATO estimator diverged. In this case it is useless
-                    * to delay ACK, it will miss in any case.
-                    *
-                    * -- The second condition is triggered when we did not
-                    * ACK 8 segments not depending of their size.
-                    * Linux senders allocate full-sized frame even for one byte
-                    * packets, so that default queue for MTU=8K can hold
-                    * only 8 packets. Note, that no other workarounds
-                    * but counting packets are possible. If sender selected
-                    * a small sndbuf or have larger mtu lockup will still
-                    * occur. Well, not lockup, but 10-20msec gap.
-                    * It is essentially dead lockup for 1Gib ethernet
-                    * and loopback :-). The value 8 covers all reasonable
-                    * cases and we may receive packet of any size
-                    * with maximal possible rate now.
+                    * If this read emptied read buffer, we send ACK, if
+                    * connection is not bidirectional, user drained
+                    * receive buffer and there was a small segment
+                    * in queue.
                     */
                    || (copied > 0 &&
-                       (tp->ack.ato >= TCP_DELACK_MAX || tp->ack.rcv_segs > 7) &&
+                       tp->ack.rcv_small > tp->ack.rcv_thresh &&
                        !tp->ack.pingpong &&
                        atomic_read(&sk->rmem_alloc) == 0)) {
                        time_to_ack = 1;
@@ -1185,15 +1237,19 @@ static void cleanup_rbuf(struct sock *sk, int copied)
         */
        if(copied > 0 && !time_to_ack && !(sk->shutdown&RCV_SHUTDOWN)) {
                __u32 rcv_window_now = tcp_receive_window(tp);
-               __u32 new_window = __tcp_select_window(sk);
 
-               /* Send ACK now, if this read freed lots of space
-                * in our buffer. Certainly, new_window is new window.
-                * We can advertise it now, if it is not less than current one.
-                * "Lots" means "at least twice" here.
-                */
-               if(new_window && new_window >= 2*rcv_window_now)
-                       time_to_ack = 1;
+               /* Optimize, __tcp_select_window() is not cheap. */
+               if (2*rcv_window_now <= tp->window_clamp) {
+                       __u32 new_window = __tcp_select_window(sk);
+
+                       /* Send ACK now, if this read freed lots of space
+                        * in our buffer. Certainly, new_window is new window.
+                        * We can advertise it now, if it is not less than current one.
+                        * "Lots" means "at least twice" here.
+                        */
+                       if(new_window && new_window >= 2*rcv_window_now)
+                               time_to_ack = 1;
+               }
        }
        if (time_to_ack)
                tcp_send_ack(sk);
@@ -1345,23 +1401,25 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg,
                            !timeo)
                                break;
                } else {
+                       if (sk->done)
+                               break;
+
                        if (sk->err) {
                                copied = sock_error(sk);
                                break;
                        }
 
-                       if (sk->shutdown & RCV_SHUTDOWN) {
-                               if (!(flags&MSG_PEEK))
-                                       sk->done = 1;
+                       if (sk->shutdown & RCV_SHUTDOWN)
                                break;
-                       }
 
                        if (sk->state == TCP_CLOSE) {
-                               if (sk->done) {
+                               if (!sk->done) {
+                                       /* This occurs when user tries to read
+                                        * from never connected socket.
+                                        */
                                        copied = -ENOTCONN;
                                        break;
-                               } else if (!(flags&MSG_PEEK))
-                                       sk->done = 1;
+                               }
                                break;
                        }
 
@@ -1629,14 +1687,20 @@ static inline int closing(struct sock * sk)
 static __inline__ void tcp_kill_sk_queues(struct sock *sk)
 {
        /* First the read buffer. */
-       skb_queue_purge(&sk->receive_queue);
+       __skb_queue_purge(&sk->receive_queue);
 
        /* Next, the error queue. */
-       skb_queue_purge(&sk->error_queue);
+       __skb_queue_purge(&sk->error_queue);
 
        /* Next, the write queue. */
        BUG_TRAP(skb_queue_empty(&sk->write_queue));
 
+       /* Account for returned memory. */
+       tcp_mem_reclaim(sk);
+
+       BUG_TRAP(sk->wmem_queued == 0);
+       BUG_TRAP(sk->forward_alloc == 0);
+
        /* It is _impossible_ for the backlog to contain anything
         * when we get here.  All user references to this socket
         * have gone away, only the net layer knows can touch it.
@@ -1706,9 +1770,11 @@ void tcp_close(struct sock *sk, long timeout)
        while((skb=__skb_dequeue(&sk->receive_queue))!=NULL) {
                u32 len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq - skb->h.th->fin;
                data_was_unread += len;
-               kfree_skb(skb);
+               __kfree_skb(skb);
        }
 
+       tcp_mem_reclaim(sk);
+
        /* As outlined in draft-ietf-tcpimpl-prob-03.txt, section
         * 3.10, we send a RST here because data was lost.  To
         * witness the awful effects of the old behavior of always
@@ -1720,11 +1786,13 @@ void tcp_close(struct sock *sk, long timeout)
         */
        if(data_was_unread != 0) {
                /* Unread data was tossed, zap the connection. */
+               NET_INC_STATS_USER(TCPAbortOnClose);
                tcp_set_state(sk, TCP_CLOSE);
                tcp_send_active_reset(sk, GFP_KERNEL);
        } else if (sk->linger && sk->lingertime==0) {
                /* Check zero linger _after_ checking for unread data. */
                sk->prot->disconnect(sk, 0);
+               NET_INC_STATS_USER(TCPAbortOnData);
        } else if (tcp_close_state(sk)) {
                /* We FIN if the application ate all the data before
                 * zapping the connection.
@@ -1807,6 +1875,7 @@ adjudge_to_death:
                if (tp->linger2 < 0) {
                        tcp_set_state(sk, TCP_CLOSE);
                        tcp_send_active_reset(sk, GFP_ATOMIC);
+                       NET_INC_STATS_BH(TCPAbortOnLinger);
                } else {
                        int tmo = tcp_fin_time(tp);
 
@@ -1819,12 +1888,17 @@ adjudge_to_death:
                        }
                }
        }
-       if (sk->state != TCP_CLOSE &&
-           atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans) {
-               if (net_ratelimit())
-                       printk(KERN_INFO "TCP: too many of orphaned sockets\n");
-               tcp_set_state(sk, TCP_CLOSE);
-               tcp_send_active_reset(sk, GFP_ATOMIC);
+       if (sk->state != TCP_CLOSE) {
+               tcp_mem_reclaim(sk);
+               if (atomic_read(&tcp_orphan_count) > sysctl_tcp_max_orphans ||
+                   (sk->wmem_queued > SOCK_MIN_SNDBUF &&
+                    atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
+                       if (net_ratelimit())
+                               printk(KERN_INFO "TCP: too many of orphaned sockets\n");
+                       tcp_set_state(sk, TCP_CLOSE);
+                       tcp_send_active_reset(sk, GFP_ATOMIC);
+                       NET_INC_STATS_BH(TCPAbortOnMemory);
+               }
        }
        atomic_inc(&tcp_orphan_count);
 
@@ -1873,7 +1947,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 
        tcp_clear_xmit_timers(sk);
        __skb_queue_purge(&sk->receive_queue);
-       __skb_queue_purge(&sk->write_queue);
+       tcp_writequeue_purge(sk);
        __skb_queue_purge(&tp->out_of_order_queue);
 
        sk->dport = 0;
@@ -1887,25 +1961,21 @@ int tcp_disconnect(struct sock *sk, int flags)
 
        sk->shutdown = 0;
        sk->done = 0;
-       sk->write_space = tcp_write_space;
        tp->srtt = 0;
-       if (sysctl_tcp_tw_recycle) {
-               if ((tp->write_seq += 2) == 0)
-                       tp->write_seq = 1;
-       } else {
-               tp->write_seq = 0;
-       }
+       if ((tp->write_seq += tp->max_window+2) == 0)
+               tp->write_seq = 1;
        tp->backoff = 0;
        tp->snd_cwnd = 2;
        tp->probes_out = 0;
        tp->packets_out = 0;
-       tp->high_seq = 0;
        tp->snd_ssthresh = 0x7fffffff;
        tp->snd_cwnd_cnt = 0;
-       tp->dup_acks = 0;
+       tp->ca_state = TCP_CA_Open;
+       tcp_clear_retrans(tp);
        tcp_delack_init(tp);
-       tp->send_head = tp->retrans_head = NULL;
+       tp->send_head = NULL;
        tp->saw_tstamp = 0;
+       tcp_sack_reset(tp);
        __sk_dst_reset(sk);
 
        BUG_TRAP(!sk->num || sk->prev);
@@ -1916,8 +1986,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 
 /*
  *     Wait for an incoming connection, avoid race
- *     conditions. This must be called with the socket locked,
- *     and without the kernel lock held.
+ *     conditions. This must be called with the socket locked.
  */
 static int wait_for_connect(struct sock * sk, long timeo)
 {
@@ -1965,8 +2034,6 @@ static int wait_for_connect(struct sock * sk, long timeo)
 
 /*
  *     This will accept the next outstanding connection.
- *
- *     Be careful about race conditions here - this is subtle.
  */
 
 struct sock *tcp_accept(struct sock *sk, int flags, int *err)
@@ -2152,7 +2219,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval,
                        tp->window_clamp = 0;
                } else {
                        tp->window_clamp = val<SOCK_MIN_RCVBUF/2 ?
-                               SOCK_MIN_SNDBUF : val;
+                               SOCK_MIN_RCVBUF/2 : val;
                }
                break;
 
@@ -2318,6 +2385,21 @@ void __init tcp_init(void)
        }
        tcp_port_rover = sysctl_local_port_range[0] - 1;
 
+       sysctl_tcp_mem[0] = 64<<order;
+       sysctl_tcp_mem[1] = 200<<order;
+       sysctl_tcp_mem[2] = 256<<order;
+       if (sysctl_tcp_mem[2] - sysctl_tcp_mem[1] > 512)
+               sysctl_tcp_mem[1] = sysctl_tcp_mem[2] - 512;
+       if (sysctl_tcp_mem[1] - sysctl_tcp_mem[0] > 512)
+               sysctl_tcp_mem[0] = sysctl_tcp_mem[1] - 512;
+
+       if (order < 3) {
+               sysctl_tcp_wmem[2] = 64*1024;
+               sysctl_tcp_rmem[0] = PAGE_SIZE;
+               sysctl_tcp_rmem[1] = 43689;
+               sysctl_tcp_rmem[2] = 2*43689;
+       }
+
        printk("TCP: Hash tables configured (established %d bind %d)\n",
               tcp_ehash_size<<1, tcp_bhash_size);
 }
index f062cb2fb1427bbb07def7c33b53a3e879e35ce9..d6b2a9d915ed5feb216a39d0342702ce2fa6dd08 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:    $Id: tcp_input.c,v 1.193 2000/04/20 14:41:16 davem Exp $
+ * Version:    $Id: tcp_input.c,v 1.195 2000/08/10 01:21:14 davem Exp $
  *
  * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -55,6 +55,7 @@
  *                                     work without delayed acks. 
  *             Andi Kleen:             Process packets with PSH set in the
  *                                     fast path.
+ *             J Hadi Salim:           ECN support
  */
 
 #include <linux/config.h>
 #include <net/inet_common.h>
 #include <linux/ipsec.h>
 
-#ifdef CONFIG_SYSCTL
-#define SYNC_INIT 0 /* let the user enable it */
-#else
-#define SYNC_INIT 1
-#endif
 
 /* These are on by default so the code paths get tested.
  * For the final 2.2 this may be undone at our discretion. -DaveM
 int sysctl_tcp_timestamps = 1;
 int sysctl_tcp_window_scaling = 1;
 int sysctl_tcp_sack = 1;
-
-int sysctl_tcp_syncookies = SYNC_INIT; 
-int sysctl_tcp_stdurg;
-int sysctl_tcp_rfc1337;
-int sysctl_tcp_tw_recycle = 1;
-int sysctl_tcp_abort_on_overflow = 0;
+int sysctl_tcp_fack = 1;
+int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
+int sysctl_tcp_ecn = 1;
+int sysctl_tcp_dsack = 1;
+int sysctl_tcp_app_win = 31;
+int sysctl_tcp_adv_win_scale = 2;
+
+int sysctl_tcp_stdurg = 0;
+int sysctl_tcp_rfc1337 = 0;
 int sysctl_tcp_max_orphans = NR_FILE;
-int sysctl_tcp_max_tw_buckets = NR_FILE*2;
 
-static int prune_queue(struct sock *sk);
+#define FLAG_DATA              0x01 /* Incoming frame contained data.          */
+#define FLAG_WIN_UPDATE                0x02 /* Incoming ACK was a window update.       */
+#define FLAG_DATA_ACKED                0x04 /* This ACK acknowledged new data.         */
+#define FLAG_RETRANS_DATA_ACKED        0x08 /* "" "" some of which was retransmitted.  */
+#define FLAG_SYN_ACKED         0x10 /* This ACK acknowledged SYN.              */
+#define FLAG_DATA_SACKED       0x20 /* New SACK.                               */
+#define FLAG_ECE               0x40 /* ECE in this ACK                         */
+#define FLAG_DATA_LOST         0x80 /* SACK detected data lossage.             */
+#define FLAG_SLOWPATH          0x100 /* Do not skip RFC checks for window update.*/
+
+#define FLAG_ACKED             (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
+#define FLAG_NOT_DUP           (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
+#define FLAG_CA_ALERT          (FLAG_DATA_SACKED|FLAG_ECE)
+#define FLAG_FORWARD_PROGRESS  (FLAG_ACKED|FLAG_DATA_SACKED)
 
-/* 
- * Adapt the MSS value used to make delayed ack decision to the 
+#define IsReno(tp) ((tp)->sack_ok == 0)
+#define IsFack(tp) ((tp)->sack_ok & 2)
+
+
+
+/* Adapt the MSS value used to make delayed ack decision to the 
  * real world.
- *
- * The constant 536 hasn't any good meaning.  In IPv4 world
- * MTU may be smaller, though it contradicts to RFC1122, which
- * states that MSS must be at least 536.
- * We use the constant to do not ACK each second
- * packet in a stream of tiny size packets.
- * It means that super-low mtu links will be aggressively delacked.
- * Seems, it is even good. If they have so low mtu, they are weirdly
- * slow.
- *
- * AK: BTW it may be useful to add an option to lock the rcv_mss.
- *     this way the beowulf people wouldn't need ugly patches to get the
- *     ack frequencies they want and it would be an elegant way to tune delack.
  */ 
 static __inline__ void tcp_measure_rcv_mss(struct tcp_opt *tp, struct sk_buff *skb)
 {
@@ -118,43 +120,56 @@ static __inline__ void tcp_measure_rcv_mss(struct tcp_opt *tp, struct sk_buff *s
        if (len >= tp->ack.rcv_mss) {
                tp->ack.rcv_mss = len;
        } else {
+               tp->ack.rcv_small++;
+
                /* Otherwise, we make more careful check taking into account,
                 * that SACKs block is variable.
                 *
                 * "len" is invariant segment length, including TCP header.
                 */
                len = skb->tail - skb->h.raw;
-               if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr)) {
+               if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) ||
+                   /* If PSH is not set, packet should be
+                    * full sized, provided peer TCP is not badly broken.
+                    * This observation (if it is correct 8)) allows
+                    * to handle super-low mtu links fairly.
+                    */
+#define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
+                   (len >= TCP_MIN_MSS + sizeof(struct tcphdr) &&
+                    !(tcp_flag_word(skb->h.th)&TCP_REMNANT))) {
                        /* Subtract also invariant (if peer is RFC compliant),
                         * tcp header plus fixed timestamp option length.
                         * Resulting "len" is MSS free of SACK jitter.
                         */
                        len -= tp->tcp_header_len;
-                       if (len == lss)
+                       if (len == lss) {
                                tp->ack.rcv_mss = len;
+                               tp->ack.rcv_small = 0;
+                               tp->ack.rcv_thresh = 0;
+                       }
                        tp->ack.last_seg_size = len;
                }
        }
 }
 
-
-static __inline__ void tcp_enter_quickack_mode(struct tcp_opt *tp)
+static void tcp_incr_quickack(struct tcp_opt *tp)
 {
-       unsigned quickacks = tcp_receive_window(tp)/(2*tp->ack.rcv_mss);
+       unsigned quickacks = tp->rcv_wnd/(2*tp->ack.rcv_mss);
 
-       tp->ack.quick = max(min(quickacks, 127), 1);
+       if (quickacks==0)
+               quickacks=2;
+       if (quickacks > tp->ack.quick)
+               tp->ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
+}
 
-       if (!tp->tstamp_ok && tp->ack.quick>2) {
-               /* Quick ACKs are _dangerous_, if RTTM is not used.
-                * See comment in tcp_init_metrics(). We still help
-                * them to overcome the most difficult, initial
-                * phase of slow start.
-                */
-               tp->ack.quick = 2;
-       }
+void tcp_enter_quickack_mode(struct tcp_opt *tp)
+{
+       tcp_incr_quickack(tp);
+       tp->ack.pingpong = 0;
+       tp->ack.ato = TCP_ATO_MIN;
 }
 
-/* Send ACKs quickly, if "quick" count is not ehausted
+/* Send ACKs quickly, if "quick" count is not exhausted
  * and the session is not interactive.
  */
 
@@ -163,6 +178,173 @@ static __inline__ int tcp_in_quickack_mode(struct tcp_opt *tp)
        return (tp->ack.quick && !tp->ack.pingpong);
 }
 
+/* Buffer size and advertised window tuning.
+ *
+ * 1. Tuning sk->sndbuf, when connection enters established state.
+ */
+
+static void tcp_fixup_sndbuf(struct sock *sk)
+{
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       int sndmem = tp->mss_clamp+MAX_TCP_HEADER+16+sizeof(struct sk_buff);
+
+       if (sk->sndbuf < 3*sndmem)
+               sk->sndbuf = min(3*sndmem, sysctl_tcp_wmem[2]);
+}
+
+/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
+ *
+ * All tcp_full_space() is split to two parts: "network" buffer, allocated
+ * forward and advertised in receiver window (tp->rcv_wnd) and
+ * "application buffer", required to isolate scheduling/application
+ * latencies from network.
+ * window_clamp is maximal advertised window. It can be less than
+ * tcp_full_space(), in this case tcp_full_space() - window_clamp
+ * is reserved for "application" buffer. The less window_clamp is
+ * the smoother our behaviour from viewpoint of network, but the lower
+ * throughput and the higher sensitivity of the connection to losses. 8)
+ *
+ * rcv_ssthresh is more strict window_clamp used at "slow start"
+ * phase to predict further behaviour of this connection.
+ * It is used for two goals:
+ * - to enforce header prediction at sender, even when application
+ *   requires some significant "application buffer". It is check #1.
+ * - to prevent pruning of receive queue because of misprediction
+ *   of receiver window. Check #2.
+ *
+ * The scheme does not work when sender sends good segments opening
+ * window and then starts to feed us spagetti. But it should work
+ * in common situations. Otherwise, we have to rely on queue collapsing.
+ */
+
+/* Slow part of check#2. */
+static int
+__tcp_grow_window(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
+{
+       /* Optimize this! */
+       int truesize = tcp_win_from_space(skb->truesize)/2;
+       int window = tcp_full_space(sk)/2;
+
+       while (tp->rcv_ssthresh <= window) {
+               if (truesize <= skb->len)
+                       return 2*tp->ack.rcv_mss;
+
+               truesize >>= 1;
+               window >>= 1;
+       }
+       return 0;
+}
+
+static __inline__ void
+tcp_grow_window(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
+{
+       /* Check #1 */
+       if (tp->rcv_ssthresh < tp->window_clamp &&
+           (int)tp->rcv_ssthresh < tcp_space(sk) &&
+           !tcp_memory_pressure) {
+               int incr;
+
+               /* Check #2. Increase window, if skb with such overhead
+                * will fit to rcvbuf in future.
+                */
+               if (tcp_win_from_space(skb->truesize) <= skb->len)
+                       incr = 2*tp->advmss;
+               else
+                       incr = __tcp_grow_window(sk, tp, skb);
+
+               if (incr) {
+                       tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);
+                       tp->ack.quick |= 1;
+               }
+       }
+}
+
+/* 3. Tuning rcvbuf, when connection enters established state. */
+
+static void tcp_fixup_rcvbuf(struct sock *sk)
+{
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       int rcvmem = tp->advmss+MAX_TCP_HEADER+16+sizeof(struct sk_buff);
+
+       /* Try to select rcvbuf so that 4 mss-sized segments
+        * will fit to window and correspoding skbs will fit to our rcvbuf.
+        * (was 3; 4 is minimum to allow fast retransmit to work.)
+        */
+       while (tcp_win_from_space(rcvmem) < tp->advmss)
+               rcvmem += 128;
+       if (sk->rcvbuf < 4*rcvmem)
+               sk->rcvbuf = min(4*rcvmem, sysctl_tcp_rmem[2]);
+}
+
+/* 4. Try to fixup all. It is made iimediately after connection enters
+ *    established state.
+ */
+static void tcp_init_buffer_space(struct sock *sk)
+{
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       int maxwin;
+
+       if (!(sk->userlocks&SOCK_RCVBUF_LOCK))
+               tcp_fixup_rcvbuf(sk);
+       if (!(sk->userlocks&SOCK_SNDBUF_LOCK))
+               tcp_fixup_sndbuf(sk);
+
+       maxwin = tcp_full_space(sk);
+
+       if (tp->window_clamp >= maxwin) {
+               tp->window_clamp = maxwin;
+
+               if (sysctl_tcp_app_win && maxwin>4*tp->advmss)
+                       tp->window_clamp = max(maxwin-(maxwin>>sysctl_tcp_app_win), 4*tp->advmss);
+       }
+
+       /* Force reservation of one segment. */
+       if (sysctl_tcp_app_win &&
+           tp->window_clamp > 2*tp->advmss &&
+           tp->window_clamp + tp->advmss > maxwin)
+               tp->window_clamp = max(2*tp->advmss, maxwin-tp->advmss);
+
+       tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
+       tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+
+/* 5. Recalculate window clamp after socket hit its memory bounds. */
+static void tcp_clamp_window(struct sock *sk, struct tcp_opt *tp)
+{
+       struct sk_buff *skb;
+       int app_win = tp->rcv_nxt - tp->copied_seq;
+       int ofo_win = 0;
+
+       tp->ack.quick = 0;
+
+       skb_queue_walk(&tp->out_of_order_queue, skb) {
+               ofo_win += skb->len;
+       }
+
+       /* If overcommit is due to out of order segments,
+        * do not clamp window. Try to expand rcvbuf instead.
+        */
+       if (ofo_win) {
+               if (sk->rcvbuf < sysctl_tcp_rmem[2] &&
+                   !(sk->userlocks&SOCK_RCVBUF_LOCK) &&
+                   !tcp_memory_pressure &&
+                   atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
+                       sk->rcvbuf = min(atomic_read(&sk->rmem_alloc), sysctl_tcp_rmem[2]);
+       }
+       if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) {
+               app_win += ofo_win;
+               if (atomic_read(&sk->rmem_alloc) >= 2*sk->rcvbuf)
+                       app_win >>= 1;
+               if (app_win > tp->ack.rcv_mss)
+                       app_win -= tp->ack.rcv_mss;
+               app_win = max(app_win, 2*tp->advmss);
+
+               if (!ofo_win)
+                       tp->window_clamp = min(tp->window_clamp, app_win);
+               tp->rcv_ssthresh = min(tp->window_clamp, 2*tp->advmss);
+       }
+}
+
 /* There is something which you must keep in mind when you analyze the
  * behavior of the tp->ato delayed ack timeout interval.  When a
  * connection starts up, we want to ack as quickly as possible.  The
@@ -173,14 +355,13 @@ static __inline__ int tcp_in_quickack_mode(struct tcp_opt *tp)
  * each ACK we send, he increments snd_cwnd and transmits more of his
  * queue.  -DaveM
  */
-static void tcp_event_data_recv(struct tcp_opt *tp, struct sk_buff *skb)
+static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
 {
        u32 now;
 
-       tcp_measure_rcv_mss(tp, skb);
+       tcp_schedule_ack(tp);
 
-       tp->ack.pending = 1;
-       tp->ack.rcv_segs++;
+       tcp_measure_rcv_mss(tp, skb);
 
        now = tcp_time_stamp;
 
@@ -188,37 +369,31 @@ static void tcp_event_data_recv(struct tcp_opt *tp, struct sk_buff *skb)
                /* The _first_ data packet received, initialize
                 * delayed ACK engine.
                 */
-
-               /* Help sender leave slow start quickly. */
                tcp_enter_quickack_mode(tp);
-
-               /* Pingpong is off, session is not interactive by default */
-               tp->ack.pingpong = 0;
-
-               /* ATO is minimal */
-               tp->ack.ato = TCP_ATO_MIN;
        } else {
                int m = now - tp->ack.lrcvtime;
 
-               if (m > TCP_ATO_MAX/2) {
-                       /* Do not touch ATO, if interval is out of bounds.
-                        * It will be deflated by delack timer, if our peer
-                        * really sends too rarely.
+               if (m <= TCP_ATO_MIN/2) {
+                       /* The fastest case is the first. */
+                       tp->ack.ato = (tp->ack.ato>>1) + TCP_ATO_MIN/2;
+               } else if (m < tp->ack.ato) {
+                       tp->ack.ato = (tp->ack.ato>>1) + m;
+                       if (tp->ack.ato > tp->rto)
+                               tp->ack.ato = tp->rto;
+               } else if (m > tp->rto) {
+                       /* Too long gap. Apparently sender falled to
+                        * restart window, so that we send ACKs quickly.
                         */
-                       if (m > tp->rto) {
-                               /* Too long gap. Apparently sender falled to
-                                * restart window, so that we send ACKs quickly.
-                                */
-                               tcp_enter_quickack_mode(tp);
-                       }
-               } else {
-                       if (m <= 0)
-                               m = TCP_ATO_MIN/2;
-                       if (m <= tp->ack.ato)
-                               tp->ack.ato = (tp->ack.ato >> 1) + m;
+                       tcp_incr_quickack(tp);
+                       tcp_mem_reclaim(sk);
                }
        }
        tp->ack.lrcvtime = now;
+
+       TCP_ECN_check_ce(tp, skb);
+
+       if (skb->len >= 128)
+               tcp_grow_window(sk, tp, skb);
 }
 
 /* Called to compute a smoothed rtt estimate. The data fed to this
@@ -230,7 +405,6 @@ static void tcp_event_data_recv(struct tcp_opt *tp, struct sk_buff *skb)
  * To save cycles in the RFC 1323 implementation it was better to break
  * it up into three procedures. -- erics
  */
-
 static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
 {
        long m = mrtt; /* RTT */
@@ -243,6 +417,13 @@ static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
         *
         *      On a 1990 paper the rto value is changed to:
         *      RTO = rtt + 4 * mdev
+        *
+        * Funny. This algorithm seems to be very broken.
+        * These formulae increase RTO, when it should be decreased, increase
+        * too slowly, when it should be incresed fastly, decrease too fastly
+        * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
+        * does not matter how to _calculate_ it. Seems, it was trap
+        * that VJ failed to avoid. 8)
         */
        if(m == 0)
                m = 1;
@@ -263,16 +444,27 @@ static __inline__ void tcp_rtt_estimator(struct tcp_opt *tp, __u32 mrtt)
 /* Calculate rto without backoff.  This is the second half of Van Jacobson's
  * routine referred to above.
  */
-
 static __inline__ void tcp_set_rto(struct tcp_opt *tp)
 {
        tp->rto = (tp->srtt >> 3) + tp->mdev;
        /* I am not enough educated to understand this magic.
         * However, it smells bad. snd_cwnd>31 is common case.
         */
+       /* OK, I found comment in 2.0 source tree, it deserves
+        * to be reproduced:
+        * ====
+        * Note: Jacobson's algorithm is fine on BSD which has a 1/2 second
+        * granularity clock, but with our 1/100 second granularity clock we
+        * become too sensitive to minor changes in the round trip time.
+        * We add in two compensating factors. First we multiply by 5/4.
+        * For large congestion windows this allows us to tolerate burst
+        * traffic delaying up to 1/4 of our packets. We also add in
+        * a rtt / cong_window term. For small congestion windows this allows
+        * a single packet delay, but has negligible effect
+        * on the compensation for large windows.
+        */
        tp->rto += (tp->rto >> 2) + (tp->rto >> (tp->snd_cwnd-1));
 }
 
 /* Keep the rto between HZ/5 and 120*HZ. 120*HZ is the upper bound
  * on packet lifetime in the internet. We need the HZ/5 lower
@@ -292,11 +484,12 @@ static __inline__ void tcp_bound_rto(struct tcp_opt *tp)
                tp->rto = TCP_RTO_MAX;
 }
 
+
 /* Save metrics learned by this TCP session.
    This function is called only, when TCP finishes sucessfully
    i.e. when it enters TIME-WAIT or goes from LAST-ACK to CLOSE.
  */
-static void tcp_update_metrics(struct sock *sk)
+void tcp_update_metrics(struct sock *sk)
 {
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
        struct dst_entry *dst = __sk_dst_get(sk);
@@ -344,19 +537,20 @@ static void tcp_update_metrics(struct sock *sk)
                                dst->rttvar -= (dst->rttvar - m)>>2;
                }
 
-               if (tp->snd_ssthresh == 0x7FFFFFFF) {
+               if (tp->snd_ssthresh >= 0xFFFF) {
                        /* Slow start still did not finish. */
                        if (dst->ssthresh &&
                            !(dst->mxlock&(1<<RTAX_SSTHRESH)) &&
-                           tp->snd_cwnd > dst->ssthresh)
-                               dst->ssthresh = tp->snd_cwnd;
+                           (tp->snd_cwnd>>1) > dst->ssthresh)
+                               dst->ssthresh = (tp->snd_cwnd>>1);
                        if (!(dst->mxlock&(1<<RTAX_CWND)) &&
                            tp->snd_cwnd > dst->cwnd)
                                dst->cwnd = tp->snd_cwnd;
-               } else if (tp->snd_cwnd >= tp->snd_ssthresh && !tp->high_seq) {
+               } else if (tp->snd_cwnd > tp->snd_ssthresh &&
+                          tp->ca_state == TCP_CA_Open) {
                        /* Cong. avoidance phase, cwnd is reliable. */
                        if (!(dst->mxlock&(1<<RTAX_SSTHRESH)))
-                               dst->ssthresh = tp->snd_cwnd;
+                               dst->ssthresh = max(tp->snd_cwnd>>1, tp->snd_ssthresh);
                        if (!(dst->mxlock&(1<<RTAX_CWND)))
                                dst->cwnd = (dst->cwnd + tp->snd_cwnd)>>1;
                } else {
@@ -370,9 +564,37 @@ static void tcp_update_metrics(struct sock *sk)
                            tp->snd_ssthresh > dst->ssthresh)
                                dst->ssthresh = tp->snd_ssthresh;
                }
+
+               if (!(dst->mxlock&(1<<RTAX_REORDERING))) {
+                       if (dst->reordering < tp->reordering &&
+                           tp->reordering != sysctl_tcp_reordering)
+                               dst->reordering = tp->reordering;
+               }
        }
 }
 
+/* Increase initial CWND conservatively: if estimated
+ * RTT is low enough (<20msec) or if we have some preset ssthresh.
+ *
+ * Numbers are taken from RFC1414.
+ */
+__u32 tcp_init_cwnd(struct tcp_opt *tp)
+{
+       __u32 cwnd;
+
+       if (tp->mss_cache > 1460)
+               return 2;
+
+       cwnd = (tp->mss_cache > 1095) ? 3 : 4;
+
+       if (!tp->srtt || (tp->snd_ssthresh >= 0xFFFF && tp->srtt > ((HZ/50)<<3)))
+               cwnd = 2;
+       else if (cwnd > tp->snd_ssthresh)
+               cwnd = tp->snd_ssthresh;
+
+       return min(cwnd, tp->snd_cwnd_clamp);
+}
+
 /* Initialize metrics on socket. */
 
 static void tcp_init_metrics(struct sock *sk)
@@ -392,6 +614,10 @@ static void tcp_init_metrics(struct sock *sk)
                if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
                        tp->snd_ssthresh = tp->snd_cwnd_clamp;
        }
+       if (dst->reordering && tp->reordering != dst->reordering) {
+               tp->sack_ok &= ~2;
+               tp->reordering = dst->reordering;
+       }
 
        if (dst->rtt == 0)
                goto reset;
@@ -422,9 +648,9 @@ static void tcp_init_metrics(struct sock *sk)
        if (tp->rto < TCP_TIMEOUT_INIT && !tp->saw_tstamp)
                goto reset;
        tp->snd_cwnd = tcp_init_cwnd(tp);
+       tp->snd_cwnd_stamp = tcp_time_stamp;
        return;
 
-
 reset:
        /* Play conservative. If timestamps are not
         * supported, TCP will fail to recalculate correct
@@ -437,402 +663,964 @@ reset:
        }
 }
 
-/* WARNING: this must not be called if tp->saw_tstamp was false. */
-extern __inline__ void
-tcp_replace_ts_recent(struct sock *sk, struct tcp_opt *tp, u32 seq)
+static void tcp_update_reordering(struct tcp_opt *tp, int metric, int ts)
 {
-       if (!after(seq, tp->rcv_wup)) {
-               /* PAWS bug workaround wrt. ACK frames, the PAWS discard
-                * extra check below makes sure this can only happen
-                * for pure ACK frames.  -DaveM
-                *
-                * Not only, also it occurs for expired timestamps
-                * and RSTs with bad timestamp option. --ANK
-                */
-
-               if((s32)(tp->rcv_tsval - tp->ts_recent) >= 0 ||
-                  xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS) {
-                       tp->ts_recent = tp->rcv_tsval;
-                       tp->ts_recent_stamp = xtime.tv_sec;
-               }
+       if (metric > tp->reordering) {
+               tp->reordering = min(TCP_MAX_REORDERING, metric);
+
+               /* This exciting event is worth to be remembered. 8) */
+               if (ts)
+                       NET_INC_STATS_BH(TCPTSReorder);
+               else if (IsReno(tp))
+                       NET_INC_STATS_BH(TCPRenoReorder);
+               else if (IsFack(tp))
+                       NET_INC_STATS_BH(TCPFACKReorder);
+               else
+                       NET_INC_STATS_BH(TCPSACKReorder);
+#if FASTRETRANS_DEBUG > 1
+               printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
+                      tp->sack_ok, tp->ca_state,
+                      tp->reordering, tp->fackets_out, tp->sacked_out,
+                      tp->undo_marker ? tp->undo_retrans : 0);
+#endif
+               /* Disable FACK yet. */
+               tp->sack_ok &= ~2;
        }
 }
 
-extern __inline__ int tcp_paws_discard(struct tcp_opt *tp, struct sk_buff *skb)
+/* This procedure tags the retransmission queue when SACKs arrive.
+ *
+ * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L).
+ * Packets in queue with these bits set are counted in variables
+ * sacked_out, retrans_out and lost_out, correspondingly.
+ *
+ * Valid combinations are:
+ * Tag  InFlight       Description
+ * 0   1               - orig segment is in flight.
+ * S   0               - nothing flies, orig reached receiver.
+ * L   0               - nothing flies, orig lost by net.
+ * R   2               - both orig and retransmit are in flight.
+ * L|R 1               - orig is lost, retransmit is in flight.
+ * S|R  1              - orig reached receiver, retrans is still in flight.
+ * (L|S|R is logically valid, it could occur when L|R is sacked,
+ *  but it is equivalent to plain S and code short-curcuits it to S.
+ *  L|S is logically invalid, it would mean -1 packet in flight 8))
+ *
+ * These 6 states form finite state machine, controlled by the following events:
+ * 1. New ACK (+SACK) arrives. (tcp_sacktag_write_queue())
+ * 2. Retransmission. (tcp_retransmit_skb(), tcp_xmit_retransmit_queue())
+ * 3. Loss detection event of one of three flavors:
+ *     A. Scoreboard estimator decided the packet is lost.
+ *        A'. Reno "three dupacks" marks head of queue lost.
+ *        A''. Its FACK modfication, head until snd.fack is lost.
+ *     B. SACK arrives sacking data transmitted after never retransmitted
+ *        hole was sent out.
+ *     C. SACK arrives sacking SND.NXT at the moment, when the
+ *        segment was retransmitted.
+ * 4. D-SACK added new rule: D-SACK changes any tag to S.
+ *
+ * It is pleasant to note, that state diagram turns out to be commutative,
+ * so that we are allowed not to be bothered by order of our actions,
+ * when multiple events arrive simultaneously. (see the function below).
+ *
+ * Reordering detection.
+ * --------------------
+ * Reordering metric is maximal distance, which a packet can be displaced
+ * in packet stream. With SACKs we can estimate it:
+ *
+ * 1. SACK fills old hole and the corresponding segment was not
+ *    ever retransmitted -> reordering. Alas, we cannot use it
+ *    when segment was retransmitted.
+ * 2. The last flaw is solved with D-SACK. D-SACK arrives
+ *    for retransmitted and already SACKed segment -> reordering..
+ * Both of these heuristics are not used in Loss state, when we cannot
+ * account for retransmits accurately.
+ */
+static int
+tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una)
 {
-       return ((s32)(tp->rcv_tsval - tp->ts_recent) < 0 &&
-               xtime.tv_sec < tp->ts_recent_stamp + TCP_PAWS_24DAYS
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
+       struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
+       int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
+       int reord = tp->packets_out;
+       int prior_fackets;
+       u32 lost_retrans = 0;
+       int flag = 0;
+       int i;
 
-                /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM
+       if (!tp->sacked_out)
+               tp->fackets_out = 0;
+       prior_fackets = tp->fackets_out;
 
-                   I cannot see quitely as all the idea behind PAWS
-                   is destroyed 8)
+       for (i=0; i<num_sacks; i++, sp++) {
+               struct sk_buff *skb;
+               __u32 start_seq = ntohl(sp->start_seq);
+               __u32 end_seq = ntohl(sp->end_seq);
+               int fack_count = 0;
+               int dup_sack = 0;
+
+               /* Check for D-SACK. */
+               if (i == 0) {
+                       u32 ack = TCP_SKB_CB(ack_skb)->ack_seq;
+
+                       if (before(start_seq, ack)) {
+                               dup_sack = 1;
+                               NET_INC_STATS_BH(TCPDSACKRecv);
+                       } else if (num_sacks > 1 &&
+                                  !after(end_seq, ntohl(sp[1].end_seq)) &&
+                                  !before(start_seq, ntohl(sp[1].start_seq))) {
+                               dup_sack = 1;
+                               NET_INC_STATS_BH(TCPDSACKOfoRecv);
+                       }
 
-                   The problem is only in reordering duplicate ACKs.
-                   Hence, we can check this rare case more carefully.
+                       /* D-SACK for already forgotten data...
+                        * Do dumb counting. */
+                       if (dup_sack &&
+                           !after(end_seq, prior_snd_una) &&
+                           after(end_seq, tp->undo_marker))
+                               tp->undo_retrans--;
 
-                   1. Check that it is really duplicate ACK (ack==snd_una)
-                   2. Give it some small "replay" window (~RTO)
+                       /* Eliminate too old ACKs, but take into
+                        * account more or less fresh ones, they can
+                        * contain valid SACK info.
+                        */
+                       if (before(ack, prior_snd_una-tp->max_window))
+                               return 0;
+               }
 
-                   We do not know units of foreign ts values, but make conservative
-                   assumption that they are >=1ms. It solves problem
-                   noted in Dave's mail to tcpimpl and does not harm PAWS. --ANK
-                 */
-                && (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq ||
-                    TCP_SKB_CB(skb)->ack_seq != tp->snd_una ||
-                    !skb->h.th->ack ||
-                    (s32)(tp->ts_recent - tp->rcv_tsval) > (tp->rto*1024)/HZ));
-}
+               /* Event "B" in the comment above. */
+               if (after(end_seq, tp->high_seq))
+                       flag |= FLAG_DATA_LOST;
 
+               for_retrans_queue(skb, sk, tp) {
+                       u8 sacked = TCP_SKB_CB(skb)->sacked;
+                       int in_sack;
 
-static int __tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq)
-{
-       u32 end_window = tp->rcv_wup + tp->rcv_wnd;
-#ifdef TCP_FORMAL_WINDOW
-       u32 rcv_wnd = tcp_receive_window(tp);
-#else
-       u32 rcv_wnd = tp->rcv_wnd;
-#endif
+                       /* The retransmission queue is always in order, so
+                        * we can short-circuit the walk early.
+                        */
+                       if(!before(TCP_SKB_CB(skb)->seq, end_seq))
+                               break;
 
-       if (rcv_wnd &&
-           after(end_seq, tp->rcv_nxt) &&
-           before(seq, end_window))
-               return 1;
-       if (seq != end_window)
-               return 0;
-       return (seq == end_seq);
-}
+                       fack_count++;
 
-/* This functions checks to see if the tcp header is actually acceptable. */
-extern __inline__ int tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq)
-{
-#ifdef TCP_FORMAL_WINDOW
-       u32 rcv_wnd = tcp_receive_window(tp);
-#else
-       u32 rcv_wnd = tp->rcv_wnd;
-#endif
-       if (seq == tp->rcv_nxt)
-               return (rcv_wnd || (end_seq == seq));
+                       in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
+                               !before(end_seq, TCP_SKB_CB(skb)->end_seq);
+
+                       /* Account D-SACK for retransmitted packet. */
+                       if ((dup_sack && in_sack) &&
+                           (sacked & TCPCB_RETRANS) &&
+                           after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
+                               tp->undo_retrans--;
+
+                       /* The frame is ACKed. */
+                       if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) {
+                               if (sacked&TCPCB_RETRANS) {
+                                       if ((dup_sack && in_sack) &&
+                                           (sacked&TCPCB_SACKED_ACKED))
+                                               reord = min(fack_count, reord);
+                               } else {
+                                       /* If it was in a hole, we detected reordering. */
+                                       if (fack_count < prior_fackets &&
+                                           !(sacked&TCPCB_SACKED_ACKED))
+                                               reord = min(fack_count, reord);
+                               }
 
-       return __tcp_sequence(tp, seq, end_seq);
-}
+                               /* Nothing to do; acked frame is about to be dropped. */
+                               continue;
+                       }
 
-/* When we get a reset we do this. */
-static void tcp_reset(struct sock *sk)
-{
-       /* We want the right error as BSD sees it (and indeed as we do). */
-       switch (sk->state) {
-               case TCP_SYN_SENT:
-                       sk->err = ECONNREFUSED;
-                       break;
-               case TCP_CLOSE_WAIT:
-                       sk->err = EPIPE;
-                       break;
-               case TCP_CLOSE:
-                       return;
-               default:
-                       sk->err = ECONNRESET;
-       }
+                       if ((sacked&TCPCB_SACKED_RETRANS) &&
+                           after(end_seq, TCP_SKB_CB(skb)->ack_seq) &&
+                           (!lost_retrans || after(end_seq, lost_retrans)))
+                               lost_retrans = end_seq;
 
-       if (!sk->dead)
-               sk->error_report(sk);
+                       if (!in_sack)
+                               continue;
 
-       tcp_done(sk);
-}
+                       if (!(sacked&TCPCB_SACKED_ACKED)) {
+                               if (sacked & TCPCB_SACKED_RETRANS) {
+                                       /* If the segment is not tagged as lost,
+                                        * we do not clear RETRANS, believing
+                                        * that retransmission is still in flight.
+                                        */
+                                       if (sacked & TCPCB_LOST) {
+                                               TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
+                                               tp->lost_out--;
+                                               tp->retrans_out--;
+                                       }
+                               } else {
+                                       /* New sack for not retransmitted frame,
+                                        * which was in hole. It is reordering.
+                                        */
+                                       if (!(sacked & TCPCB_RETRANS) &&
+                                           fack_count < prior_fackets)
+                                               reord = min(fack_count, reord);
 
-/* This tags the retransmission queue when SACKs arrive. */
-static void tcp_sacktag_write_queue(struct sock *sk, struct tcp_sack_block *sp, int nsacks)
-{
-       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-       int i = nsacks;
+                                       if (sacked & TCPCB_LOST) {
+                                               TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+                                               tp->lost_out--;
+                                       }
+                               }
 
-       while(i--) {
-               struct sk_buff *skb = skb_peek(&sk->write_queue);
-               __u32 start_seq = ntohl(sp->start_seq);
-               __u32 end_seq = ntohl(sp->end_seq);
-               int fack_count = 0;
+                               TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
+                               flag |= FLAG_DATA_SACKED;
+                               tp->sacked_out++;
 
-               while((skb != NULL) &&
-                     (skb != tp->send_head) &&
-                     (skb != (struct sk_buff *)&sk->write_queue)) {
-                       /* The retransmission queue is always in order, so
-                        * we can short-circuit the walk early.
-                        */
-                       if(after(TCP_SKB_CB(skb)->seq, end_seq))
-                               break;
+                               if (fack_count > tp->fackets_out)
+                                       tp->fackets_out = fack_count;
+                       } else {
+                               if (dup_sack && (sacked&TCPCB_RETRANS))
+                                       reord = min(fack_count, reord);
+                       }
 
-                       /* We play conservative, we don't allow SACKS to partially
-                        * tag a sequence space.
+                       /* D-SACK. We can detect redundant retransmission
+                        * in S|R and plain R frames and clear it.
+                        * undo_retrans is decreased above, L|R frames
+                        * are accounted above as well.
                         */
-                       fack_count++;
-                       if(!after(start_seq, TCP_SKB_CB(skb)->seq) &&
-                          !before(end_seq, TCP_SKB_CB(skb)->end_seq)) {
-                               /* If this was a retransmitted frame, account for it. */
-                               if((TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) &&
-                                  tp->retrans_out)
-                                       tp->retrans_out--;
-                               TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
+                       if (dup_sack &&
+                           (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
+                               TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+                               tp->retrans_out--;
+                       }
+               }
+       }
 
-                               /* RULE: All new SACKs will either decrease retrans_out
-                                *       or advance fackets_out.
-                                */
-                               if(fack_count > tp->fackets_out)
-                                       tp->fackets_out = fack_count;
+       /* Check for lost retransmit. This superb idea is
+        * borrowed from "ratehalving". Event "C".
+        * Later note: FACK people cheated me again 8),
+        * we have to account for reordering! Ugly,
+        * but should help.
+        */
+       if (lost_retrans && tp->ca_state == TCP_CA_Recovery) {
+               struct sk_buff *skb;
+
+               for_retrans_queue(skb, sk, tp) {
+                       if (after(TCP_SKB_CB(skb)->seq, lost_retrans))
+                               break;
+                       if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
+                               continue;
+                       if ((TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) &&
+                           after(lost_retrans, TCP_SKB_CB(skb)->ack_seq) &&
+                           (IsFack(tp) ||
+                            !before(lost_retrans, TCP_SKB_CB(skb)->ack_seq+tp->reordering*tp->mss_cache))) {
+                               TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+                               tp->retrans_out--;
+
+                               if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
+                                       tp->lost_out++;
+                                       TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+                                       flag |= FLAG_DATA_SACKED;
+                                       NET_INC_STATS_BH(TCPLostRetransmit);
+                               }
                        }
-                       skb = skb->next;
                }
-               sp++; /* Move on to the next SACK block. */
        }
+
+       tp->left_out = tp->sacked_out + tp->lost_out;
+
+       if (reord < tp->fackets_out && tp->ca_state != TCP_CA_Loss)
+               tcp_update_reordering(tp, (tp->fackets_out+1)-reord, 0);
+
+#if FASTRETRANS_DEBUG > 0
+       BUG_TRAP((int)tp->sacked_out >= 0);
+       BUG_TRAP((int)tp->lost_out >= 0);
+       BUG_TRAP((int)tp->retrans_out >= 0);
+       BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0);
+#endif
+       return flag;
 }
 
-/* Look for tcp options. Normally only called on SYN and SYNACK packets.
- * But, this can also be called on packets in the established flow when
- * the fast version below fails.
+void tcp_clear_retrans(struct tcp_opt *tp)
+{
+       tp->left_out = 0;
+       tp->retrans_out = 0;
+
+       tp->fackets_out = 0;
+       tp->sacked_out = 0;
+       tp->lost_out = 0;
+
+       tp->undo_marker = 0;
+       tp->undo_retrans = 0;
+}
+
+/* Enter Loss state. If "how" is not zero, forget all SACK information
+ * and reset tags completely, otherwise preserve SACKs. If receiver
+ * dropped its ofo queue, we will know this due to reneging detection.
  */
-void tcp_parse_options(struct sock *sk, struct tcphdr *th, struct tcp_opt *tp, int no_fancy)
+void tcp_enter_loss(struct sock *sk, int how)
 {
-       unsigned char *ptr;
-       int length=(th->doff*4)-sizeof(struct tcphdr);
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+       struct sk_buff *skb;
+       int cnt = 0;
+
+       /* Reduce ssthresh if it has not yet been made inside this window. */
+       if (tp->ca_state <= TCP_CA_Disorder ||
+           tp->snd_una == tp->high_seq ||
+           (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) {
+               tp->prior_ssthresh = tcp_current_ssthresh(tp);
+               tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
+       }
+       tp->snd_cwnd = 1;
+       tp->snd_cwnd_cnt = 0;
+       tp->snd_cwnd_stamp = tcp_time_stamp;
+
+       tcp_clear_retrans(tp);
+
+       /* Push undo marker, if it was plain RTO and nothing
+        * was retransmitted. */
+       if (!how)
+               tp->undo_marker = tp->snd_una;
+
+       for_retrans_queue(skb, sk, tp) {
+               cnt++;
+               if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
+                       tp->undo_marker = 0;
+               TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
+               if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
+                       TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
+                       TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+                       tp->lost_out++;
+               } else {
+                       tp->sacked_out++;
+                       tp->fackets_out = cnt;
+               }
+       }
+       tp->left_out = tp->sacked_out + tp->lost_out;
 
-       ptr = (unsigned char *)(th + 1);
-       tp->saw_tstamp = 0;
+       tp->reordering = min(tp->reordering, sysctl_tcp_reordering);
+       tp->ca_state = TCP_CA_Loss;
+       tp->high_seq = tp->snd_nxt;
+       TCP_ECN_queue_cwr(tp);
+}
 
-       while(length>0) {
-               int opcode=*ptr++;
-               int opsize;
+static int tcp_check_sack_reneging(struct sock *sk, struct tcp_opt *tp)
+{
+       struct sk_buff *skb;
 
-               switch (opcode) {
-                       case TCPOPT_EOL:
-                               return;
-                       case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
-                               length--;
-                               continue;
-                       default:
-                               opsize=*ptr++;
-                               if (opsize < 2) /* "silly options" */
-                                       return;
-                               if (opsize > length)
-                                       break;  /* don't parse partial options */
-                               switch(opcode) {
-                               case TCPOPT_MSS:
-                                       if(opsize==TCPOLEN_MSS && th->syn) {
-                                               u16 in_mss = ntohs(*(__u16 *)ptr);
-                                               if (in_mss) {
-                                                       if (tp->user_mss && tp->user_mss < in_mss)
-                                                               in_mss = tp->user_mss;
-                                                       tp->mss_clamp = in_mss;
-                                               }
-                                       }
-                                       break;
-                               case TCPOPT_WINDOW:
-                                       if(opsize==TCPOLEN_WINDOW && th->syn)
-                                               if (!no_fancy && sysctl_tcp_window_scaling) {
-                                                       tp->wscale_ok = 1;
-                                                       tp->snd_wscale = *(__u8 *)ptr;
-                                                       if(tp->snd_wscale > 14) {
-                                                               if(net_ratelimit())
-                                                                       printk("tcp_parse_options: Illegal window "
-                                                                              "scaling value %d >14 received.",
-                                                                              tp->snd_wscale);
-                                                               tp->snd_wscale = 14;
-                                                       }
-                                               }
-                                       break;
-                               case TCPOPT_TIMESTAMP:
-                                       if(opsize==TCPOLEN_TIMESTAMP) {
-                                               if (sysctl_tcp_timestamps && !no_fancy) {
-                                                       tp->tstamp_ok = 1;
-                                                       tp->saw_tstamp = 1;
-                                                       tp->rcv_tsval = ntohl(*(__u32 *)ptr);
-                                                       tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));
-                                               }
-                                       }
-                                       break;
-                               case TCPOPT_SACK_PERM:
-                                       if(opsize==TCPOLEN_SACK_PERM && th->syn) {
-                                               if (sysctl_tcp_sack && !no_fancy) {
-                                                       tp->sack_ok = 1;
-                                                       tp->num_sacks = 0;
-                                               }
-                                       }
-                                       break;
+       /* If ACK arrived pointing to a remembered SACK,
+        * it means that our remembered SACKs do not reflect
+        * real state of receiver i.e.
+        * receiver _host_ is heavily congested (or buggy).
+        * Do processing similar to RTO timeout.
+        */
+       if ((skb = skb_peek(&sk->write_queue)) != NULL &&
+           (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
+               NET_INC_STATS_BH(TCPSACKReneging);
 
-                               case TCPOPT_SACK:
-                                       if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
-                                          sysctl_tcp_sack && (sk != NULL) && !th->syn) {
-                                               int sack_bytes = opsize - TCPOLEN_SACK_BASE;
+               tcp_enter_loss(sk, 1);
+               tp->retransmits++;
+               tcp_retransmit_skb(sk, skb_peek(&sk->write_queue));
+               tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+               return 1;
+       }
+       return 0;
+}
+
+static inline int tcp_fackets_out(struct tcp_opt *tp)
+{
+       return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out;
+}
 
-                                               if(!(sack_bytes % TCPOLEN_SACK_PERBLOCK)) {
-                                                       int num_sacks = sack_bytes >> 3;
-                                                       struct tcp_sack_block *sackp;
 
-                                                       sackp = (struct tcp_sack_block *)ptr;
-                                                       tcp_sacktag_write_queue(sk, sackp, num_sacks);
-                                               }
-                                       }
-                               };
-                               ptr+=opsize-2;
-                               length-=opsize;
-               };
+/* Linux NewReno/SACK/FACK/ECN state machine.
+ * --------------------------------------
+ *
+ * "Open"      Normal state, no dubious events, fast path.
+ * "Disorder"   In all the respects it is "Open",
+ *             but requires a bit more attention. It is entered when
+ *             we see some SACKs or dupacks. It is split of "Open"
+ *             mainly to move some processing from fast path to slow one.
+ * "CWR"       CWND was reduced due to some Congestion Notification event.
+ *             It can be ECN, ICMP source quench, local device congestion.
+ * "Recovery"  CWND was reduced, we are fast-retransmitting.
+ * "Loss"      CWND was reduced due to RTO timeout or SACK reneging.
+ *
+ * tcp_fastretrans_alert() is entered:
+ * - each incoming ACK, if state is not "Open"
+ * - when arrived ACK is unusual, namely:
+ *     * SACK
+ *     * Duplicate ACK.
+ *     * ECN ECE.
+ *
+ * Counting packets in flight is pretty simple.
+ *
+ *     in_flight = packets_out - left_out + retrans_out
+ *
+ *     packets_out is SND.NXT-SND.UNA counted in packets.
+ *
+ *     retrans_out is number of retransmitted segments.
+ *
+ *     left_out is number of segments left network, but not ACKed yet.
+ *
+ *             left_out = sacked_out + lost_out
+ *
+ *     sacked_out: Packets, which arrived to receiver out of order
+ *                and hence not ACKed. With SACKs this number is simply
+ *                amount of SACKed data. Even without SACKs
+ *                it is easy to give pretty reliable estimate of this number,
+ *                counting duplicate ACKs.
+ *
+ *       lost_out: Packets lost by network. TCP has no explicit
+ *                "loss notification" feedback from network (for now).
+ *                It means that this number can be only _guessed_.
+ *                Actually, it is the heuristics to predict lossage that
+ *                distinguishes different algorithms.
+ *
+ *     F.e. after RTO, when all the queue is considered as lost,
+ *     lost_out = packets_out and in_flight = retrans_out.
+ *
+ *             Essentially, we have now two algorithms counting
+ *             lost packets.
+ *
+ *             FACK: It is the simplest heuristics. As soon as we decided
+ *             that something is lost, we decide that _all_ not SACKed
+ *             packets until the most forward SACK are lost. I.e.
+ *             lost_out = fackets_out - sacked_out and left_out = fackets_out.
+ *             It is absolutely correct estimate, if network does not reorder
+ *             packets. And it loses any connection to reality when reordering
+ *             takes place. We use FACK by default until reordering
+ *             is suspected on the path to this destination.
+ *
+ *             NewReno: when Recovery is entered, we assume that one segment
+ *             is lost (classic Reno). While we are in Recovery and
+ *             a partial ACK arrives, we assume that one more packet
+ *             is lost (NewReno). This heuristics are the same in NewReno
+ *             and SACK.
+ *
+ *  Imagine, that's all! Forget about all this shamanism about CWND inflation
+ *  deflation etc. CWND is real congestion window, never inflated, changes
+ *  only according to classic VJ rules.
+ *
+ * Really tricky (and requiring careful tuning) part of algorithm
+ * is hidden in functions tcp_time_to_recover() and tcp_xmit_retransmit_queue().
+ * The first determines the moment _when_ we should reduce CWND and,
+ * hence, slow down forward transmission. In fact, it determines the moment
+ * when we decide that hole is caused by loss, rather than by a reorder.
+ *
+ * tcp_xmit_retransmit_queue() decides, _what_ we should retransmit to fill
+ * holes, caused by lost packets.
+ *
+ * And the most logically complicated part of algorithm is undo
+ * heuristics. We detect false retransmits due to both too early
+ * fast retransmit (reordering) and underestimated RTO, analyzing
+ * timestamps and D-SACKs. When we detect that some segments were
+ * retransmitted by mistake and CWND reduction was wrong, we undo
+ * window reduction and abort recovery phase. This logic is hidden
+ * inside several functions named tcp_try_undo_<something>.
+ */
+
+/* This function decides, when we should leave Disordered state
+ * and enter Recovery phase, reducing congestion window.
+ *
+ * Main question: may we further continue forward transmission
+ * with the same cwnd?
+ */
+static int
+tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp)
+{
+       /* Trick#1: The loss is proven. */
+       if (tp->lost_out)
+               return 1;
+
+       /* Not-A-Trick#2 : Classic rule... */
+       if (tcp_fackets_out(tp) > tp->reordering)
+               return 1;
+
+       /* Trick#3: It is still not OK... But will it be useful to delay
+        * recovery more?
+        */
+       if (tp->packets_out <= tp->reordering &&
+           tp->sacked_out >= max(tp->packets_out/2, sysctl_tcp_reordering) &&
+           !tcp_may_send_now(sk, tp)) {
+               /* We have nothing to send. This connection is limited
+                * either by receiver window or by application.
+                */
+               return 1;
        }
+
+       return 0;
 }
 
-/* Fast parse options. This hopes to only see timestamps.
- * If it is wrong it falls back on tcp_parse_options().
+/* If we receive more dupacks than we expected counting segments
+ * in assumption of absent reordering, interpret this as reordering.
+ * The only another reason could be bug in receiver TCP.
  */
-static __inline__ int tcp_fast_parse_options(struct sock *sk, struct tcphdr *th, struct tcp_opt *tp)
+static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend)
 {
-       /* If we didn't send out any options ignore them all. */
-       if (tp->tcp_header_len == sizeof(struct tcphdr))
-               return 0;
-       if (th->doff == sizeof(struct tcphdr)>>2) {
-               tp->saw_tstamp = 0;
-               return 0;
-       } else if (th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
-               __u32 *ptr = (__u32 *)(th + 1);
-               if (*ptr == __constant_ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
-                                            | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
-                       tp->saw_tstamp = 1;
-                       ++ptr;
-                       tp->rcv_tsval = ntohl(*ptr);
-                       ++ptr;
-                       tp->rcv_tsecr = ntohl(*ptr);
-                       return 1;
+       if (tp->sacked_out + 1 > tp->packets_out) {
+               tp->sacked_out = tp->packets_out ? tp->packets_out - 1 : 0;
+               tcp_update_reordering(tp, tp->packets_out+addend, 0);
+       }
+}
+
+/* Emulate SACKs for SACKless connection: account for a new dupack. */
+
+static void tcp_add_reno_sack(struct tcp_opt *tp)
+{
+       ++tp->sacked_out;
+       tcp_check_reno_reordering(tp, 0);
+       tp->left_out = tp->sacked_out + tp->lost_out;
+}
+
+/* Account for ACK, ACKing some data in Reno Recovery phase. */
+
+static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked)
+{
+       if (acked > 0) {
+               /* One ACK eated lost packet. Must eat! */
+               BUG_TRAP(tp->lost_out == 0);
+
+               /* The rest eat duplicate ACKs. */
+               if (acked-1 >= tp->sacked_out)
+                       tp->sacked_out = 0;
+               else
+                       tp->sacked_out -= acked-1;
+       }
+       tcp_check_reno_reordering(tp, acked);
+       tp->left_out = tp->sacked_out + tp->lost_out;
+}
+
+static inline void tcp_reset_reno_sack(struct tcp_opt *tp)
+{
+       tp->sacked_out = 0;
+       tp->left_out = tp->lost_out;
+}
+
+/* Mark head of queue up as lost. */
+static void
+tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_seq)
+{
+       struct sk_buff *skb;
+       int cnt = packets;
+
+       BUG_TRAP(cnt <= tp->packets_out);
+
+       for_retrans_queue(skb, sk, tp) {
+               if (--cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
+                       break;
+               if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
+                       TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+                       tp->lost_out++;
                }
        }
-       tcp_parse_options(sk, th, tp, 0);
-       return 1;
+       tp->left_out = tp->sacked_out + tp->lost_out;
 }
 
-#define FLAG_DATA              0x01 /* Incoming frame contained data.          */
-#define FLAG_WIN_UPDATE                0x02 /* Incoming ACK was a window update.       */
-#define FLAG_DATA_ACKED                0x04 /* This ACK acknowledged new data.         */
-#define FLAG_RETRANS_DATA_ACKED        0x08 /* "" "" some of which was retransmitted.  */
-#define FLAG_SYN_ACKED         0x10 /* This ACK acknowledged new data.         */
+/* Account newly detected lost packet(s) */
 
-static __inline__ void clear_fast_retransmit(struct tcp_opt *tp)
+static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp)
 {
-       if (tp->dup_acks > 3)
-               tp->snd_cwnd = (tp->snd_ssthresh);
+       if (IsFack(tp)) {
+               int lost = tp->fackets_out - tp->reordering;
+               if (lost <= 0)
+                       lost = 1;
+               tcp_mark_head_lost(sk, tp, lost, tp->high_seq);
+       } else {
+               tcp_mark_head_lost(sk, tp, 1, tp->high_seq);
+       }
+}
 
-       tp->dup_acks = 0;
+/* CWND moderation, preventing bursts due to too big ACKs
+ * in dubious situations.
+ */
+static __inline__ void tcp_moderate_cwnd(struct tcp_opt *tp)
+{
+       tp->snd_cwnd = min(tp->snd_cwnd,
+                          tcp_packets_in_flight(tp)+tcp_max_burst(tp));
+       tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-/* NOTE: This code assumes that tp->dup_acks gets cleared when a
- * retransmit timer fires.
+/* Decrease cwnd each second ack. */
+
+static void tcp_cwnd_down(struct tcp_opt *tp)
+{
+       int decr = tp->snd_cwnd_cnt + 1;
+
+       tp->snd_cwnd_cnt = decr&1;
+       decr >>= 1;
+
+       if (decr && tp->snd_cwnd > tp->snd_ssthresh/2)
+               tp->snd_cwnd -= decr;
+
+       tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);
+       tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+
+/* Nothing was retransmitted or returned timestamp is less
+ * than timestamp of the first retransmission.
  */
-static void tcp_fast_retrans(struct sock *sk, u32 ack, int not_dup)
+static __inline__ int tcp_packet_delayed(struct tcp_opt *tp)
+{
+       return !tp->retrans_stamp ||
+               (tp->saw_tstamp &&
+                (__s32)(tp->rcv_tsecr - tp->retrans_stamp) < 0);
+}
+
+/* Undo procedures. */
+
+#if FASTRETRANS_DEBUG > 1
+static void DBGUNDO(struct sock *sk, struct tcp_opt *tp, const char *msg)
+{
+       printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
+              msg,
+              NIPQUAD(sk->daddr), ntohs(sk->dport),
+              tp->snd_cwnd, tp->left_out,
+              tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out);
+}
+#else
+#define DBGUNDO(x...) do { } while (0)
+#endif
+
+static void tcp_undo_cwr(struct tcp_opt *tp, int undo)
+{
+       if (tp->prior_ssthresh) {
+               tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);
+               if (undo && tp->prior_ssthresh > tp->snd_ssthresh)
+                       tp->snd_ssthresh = tp->prior_ssthresh;
+       } else {
+               tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
+       }
+       tcp_moderate_cwnd(tp);
+       tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+
+static inline int tcp_may_undo(struct tcp_opt *tp)
+{
+       return tp->undo_marker &&
+               (!tp->undo_retrans || tcp_packet_delayed(tp));
+}
+
+/* People celebrate: "We love our President!" */
+static int tcp_try_undo_recovery(struct sock *sk, struct tcp_opt *tp)
+{
+       if (tcp_may_undo(tp)) {
+               /* Happy end! We did not retransmit anything
+                * or our original transmission succeeded.
+                */
+               DBGUNDO(sk, tp, tp->ca_state == TCP_CA_Loss ? "loss" : "retrans");
+               tcp_undo_cwr(tp, 1);
+               if (tp->ca_state == TCP_CA_Loss)
+                       NET_INC_STATS_BH(TCPLossUndo);
+               else
+                       NET_INC_STATS_BH(TCPFullUndo);
+               tp->undo_marker = 0;
+       }
+       if (tp->snd_una == tp->high_seq && IsReno(tp)) {
+               /* Hold old state until something *above* high_seq
+                * is ACKed. For Reno it is MUST to prevent false
+                * fast retransmits (RFC2582). SACK TCP is safe. */
+               tcp_moderate_cwnd(tp);
+               return 1;
+       }
+       tp->ca_state = TCP_CA_Open;
+       return 0;
+}
+
+/* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
+static void tcp_try_undo_dsack(struct sock *sk, struct tcp_opt *tp)
+{
+       if (tp->undo_marker && !tp->undo_retrans) {
+               DBGUNDO(sk, tp, "D-SACK");
+               tcp_undo_cwr(tp, 1);
+               tp->undo_marker = 0;
+               NET_INC_STATS_BH(TCPDSACKUndo);
+       }
+}
+
+/* Undo during fast recovery after partial ACK. */
+
+static int tcp_try_undo_partial(struct sock *sk, struct tcp_opt *tp, int acked)
+{
+       /* Partial ACK arrived. Force Hoe's retransmit. */
+       int failed = IsReno(tp) || tp->fackets_out>tp->reordering;
+
+       if (tcp_may_undo(tp)) {
+               /* Plain luck! Hole if filled with delayed
+                * packet, rather than with a retransmit.
+                */
+               if (tp->retrans_out == 0)
+                       tp->retrans_stamp = 0;
+
+               tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1);
+
+               DBGUNDO(sk, tp, "Hoe");
+               tcp_undo_cwr(tp, 0);
+               NET_INC_STATS_BH(TCPPartialUndo);
+
+               /* So... Do not make Hoe's retransmit yet.
+                * If the first packet was delayed, the rest
+                * ones are most probably delayed as well.
+                */
+               failed = 0;
+       }
+       return failed;
+}
+
+/* Undo during loss recovery after partial ACK. */
+static int tcp_try_undo_loss(struct sock *sk, struct tcp_opt *tp)
+{
+       if (tcp_may_undo(tp)) {
+               struct sk_buff *skb;
+               for_retrans_queue(skb, sk, tp) {
+                       TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+               }
+               DBGUNDO(sk, tp, "partial loss");
+               tp->lost_out = 0;
+               tp->left_out = tp->sacked_out;
+               tcp_undo_cwr(tp, 1);
+               NET_INC_STATS_BH(TCPLossUndo);
+               tp->retransmits = 0;
+               tp->undo_marker = 0;
+               if (!IsReno(tp)) {
+                       tp->ca_state = TCP_CA_Open;
+                       tp->backoff = 0;
+               }
+               return 1;
+       }
+       return 0;
+}
+
+static __inline__ void tcp_complete_cwr(struct tcp_opt *tp)
+{
+       tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh);
+       tp->snd_cwnd_stamp = tcp_time_stamp;
+}
+
+static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag)
+{
+       tp->left_out = tp->sacked_out;
+
+       if (tp->retrans_out == 0)
+               tp->retrans_stamp = 0;
+
+       if (flag&FLAG_ECE) {
+               tcp_enter_cwr(tp);
+       } else if (tp->ca_state != TCP_CA_CWR) {
+               int state = TCP_CA_Open;
+
+               if (tp->left_out ||
+                   tp->retrans_out ||
+                   tp->undo_marker)
+                       state = TCP_CA_Disorder;
+
+               if (tp->ca_state != state) {
+                       tp->ca_state = state;
+                       tp->high_seq = tp->snd_nxt;
+               }
+       }
+       tcp_moderate_cwnd(tp);
+}
+
+/* Process an event, which can update packets-in-flight not trivially.
+ * Main goal of this function is to calculate new estimate for left_out,
+ * taking into account both packets sitting in receiver's buffer and
+ * packets lost by network.
+ *
+ * Besides that it does CWND reduction, when packet loss is detected
+ * and changes state of machine.
+ *
+ * It does _not_ decide what to send, it is made in function
+ * tcp_xmit_retransmit_queue().
+ */
+static void
+tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
+                     int prior_packets, int flag)
 {
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP));
+
+       /* Some technical things:
+        * 1. Reno does not count dupacks (sacked_out) automatically. */
+       if (!tp->packets_out)
+               tp->sacked_out = 0;
+        /* 2. SACK counts snd_fack in packets inaccurately. */
+       if (tp->sacked_out == 0)
+               tp->fackets_out = 0;
+
+        /* Now state machine starts.
+        * A. ECE, hence prohibit cwnd undoing, the reduction is required. */
+       if (flag&FLAG_ECE)
+               tp->prior_ssthresh = 0;
+
+       /* B. In all the states check for reneging SACKs. */
+       if (tp->sacked_out && tcp_check_sack_reneging(sk, tp))
+               return;
 
-       /* Note: If not_dup is set this implies we got a
-        * data carrying packet or a window update.
-        * This carries no new information about possible
-        * lost packets, so we have to ignore it for the purposes
-        * of counting duplicate acks. Ideally this does not imply we
-        * should stop our fast retransmit phase, more acks may come
-        * later without data to help us. Unfortunately this would make
-        * the code below much more complex. For now if I see such
-        * a packet I clear the fast retransmit phase.
-        */
-       if (ack == tp->snd_una && tp->packets_out && (not_dup == 0)) {
-               /* This is the standard reno style fast retransmit branch. */
-
-                /* 1. When the third duplicate ack is received, set ssthresh 
-                 * to one half the current congestion window, but no less 
-                 * than two segments. Retransmit the missing segment.
-                 */
-               if (tp->high_seq == 0 || after(ack, tp->high_seq)) {
-                       tp->dup_acks++;
-                       if ((tp->fackets_out > 3) || (tp->dup_acks == 3)) {
-                               __tcp_enter_cong_avoid(tp);
-                               /* ... and account for 3 ACKs, which are
-                                * already received to this time.
-                                */
-                                tp->snd_cwnd += 3;
-
-                               if(!tp->fackets_out)
-                                       tcp_retransmit_skb(sk,
-                                                          skb_peek(&sk->write_queue));
-                               else
-                                       tcp_fack_retransmit(sk);
-                                tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+       /* C. Process data loss notification, provided it is valid. */
+       if ((flag&FLAG_DATA_LOST) &&
+           before(tp->snd_una, tp->high_seq) &&
+           tp->ca_state != TCP_CA_Open &&
+           tp->fackets_out > tp->reordering) {
+               tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
+               NET_INC_STATS_BH(TCPLoss);
+       }
+
+       /* D. Synchronize left_out to current state. */
+       tp->left_out = tp->sacked_out + tp->lost_out;
+
+       /* E. Check state exit conditions. State can be terminated
+        *    when high_seq is ACKed. */
+       if (tp->ca_state == TCP_CA_Open) {
+               BUG_TRAP(tp->retrans_out == 0);
+               tp->retrans_stamp = 0;
+       } else if (!before(tp->snd_una, tp->high_seq)) {
+               switch (tp->ca_state) {
+               case TCP_CA_Loss:
+                       tp->retransmits = 0;
+                       if (tcp_try_undo_recovery(sk, tp))
+                               return;
+                       tp->backoff = 0;
+                       break;
+
+               case TCP_CA_CWR:
+                       /* CWR is to be held something *above* high_seq
+                        * is ACKed for CWR bit to reach receiver. */
+                       if (tp->snd_una != tp->high_seq) {
+                               tcp_complete_cwr(tp);
+                               tp->ca_state = TCP_CA_Open;
                        }
-               } else if (++tp->dup_acks > 3) {
-                       /* 2. Each time another duplicate ACK arrives, increment 
-                        * cwnd by the segment size. [...] Transmit a packet...
-                        *
-                        * Packet transmission will be done on normal flow processing
-                        * since we're not in "retransmit mode".  We do not use
-                        * duplicate ACKs to artificially inflate the congestion
-                        * window when doing FACK.
-                        */
-                       if(!tp->fackets_out) {
-                               tp->snd_cwnd++;
-                       } else {
-                               /* Fill any further holes which may have
-                                * appeared.
-                                *
-                                * We may want to change this to run every
-                                * further multiple-of-3 dup ack increments,
-                                * to be more robust against out-of-order
-                                * packet delivery.  -DaveM
-                                */
-                               tcp_fack_retransmit(sk);
+                       break;
+
+               case TCP_CA_Disorder:
+                       tcp_try_undo_dsack(sk, tp);
+                       if (IsReno(tp) || !tp->undo_marker) {
+                               tp->undo_marker = 0;
+                               tp->ca_state = TCP_CA_Open;
                        }
+                       break;
+
+               case TCP_CA_Recovery:
+                       if (IsReno(tp))
+                               tcp_reset_reno_sack(tp);
+                       if (tcp_try_undo_recovery(sk, tp))
+                               return;
+                       tcp_complete_cwr(tp);
+                       break;
                }
-       } else if (tp->high_seq != 0) {
-               /* In this branch we deal with clearing the Floyd style
-                * block on duplicate fast retransmits, and if requested
-                * we do Hoe style secondary fast retransmits.
-                */
-               if (!before(ack, tp->high_seq) || (not_dup & FLAG_DATA) != 0) {
-                       /* Once we have acked all the packets up to high_seq
-                        * we are done this fast retransmit phase.
-                        * Alternatively data arrived. In this case we
-                        * Have to abort the fast retransmit attempt.
-                        * Note that we do want to accept a window
-                        * update since this is expected with Hoe's algorithm.
-                        */
-                       clear_fast_retransmit(tp);
+       }
 
-                       /* After we have cleared up to high_seq we can
-                        * clear the Floyd style block.
-                        */
-                       if (!before(ack, tp->high_seq)) {
-                               tp->high_seq = 0;
-                               tp->fackets_out = 0;
-                       }
-               } else if (tp->dup_acks >= 3) {
-                       if (!tp->fackets_out) {
-                               /* Hoe Style. We didn't ack the whole
-                                * window. Take this as a cue that
-                                * another packet was lost and retransmit it.
-                                * Don't muck with the congestion window here.
-                                * Note that we have to be careful not to
-                                * act if this was a window update and it
-                                * didn't ack new data, since this does
-                                * not indicate a packet left the system.
-                                * We can test this by just checking
-                                * if ack changed from snd_una, since
-                                * the only way to get here without advancing
-                                * from snd_una is if this was a window update.
-                                */
-                               if (ack != tp->snd_una && before(ack, tp->high_seq)) {
-                                       tcp_retransmit_skb(sk,
-                                                          skb_peek(&sk->write_queue));
-                                       tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
-                               }
-                       } else {
-                               /* FACK style, fill any remaining holes in
-                                * receiver's queue.
-                                */
-                               tcp_fack_retransmit(sk);
-                       }
+       /* F. Process state. */
+       switch (tp->ca_state) {
+       case TCP_CA_Recovery:
+               if (prior_snd_una == tp->snd_una) {
+                       if (IsReno(tp) && is_dupack)
+                               tcp_add_reno_sack(tp);
+               } else {
+                       int acked = prior_packets - tp->packets_out;
+                       if (IsReno(tp))
+                               tcp_remove_reno_sacks(sk, tp, acked);
+                       is_dupack = tcp_try_undo_partial(sk, tp, acked);
+               }
+               break;
+       case TCP_CA_Loss:
+               if (flag & FLAG_ACKED)
+                       tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+               if (!tcp_try_undo_loss(sk, tp)) {
+                       tcp_moderate_cwnd(tp);
+                       tcp_xmit_retransmit_queue(sk);
+                       return;
+               }
+               if (tp->ca_state != TCP_CA_Open)
+                       return;
+               /* Loss is undone; fall through to processing in Open state. */
+       default:
+               if (IsReno(tp)) {
+                       if (tp->snd_una != prior_snd_una)
+                               tcp_reset_reno_sack(tp);
+                       if (is_dupack)
+                               tcp_add_reno_sack(tp);
+               }
+
+               if (tp->ca_state == TCP_CA_Disorder)
+                       tcp_try_undo_dsack(sk, tp);
+
+               if (!tcp_time_to_recover(sk, tp)) {
+                       tcp_try_to_open(sk, tp, flag);
+                       return;
+               }
+
+               /* Otherwise enter Recovery state */
+
+               if (IsReno(tp))
+                       NET_INC_STATS_BH(TCPRenoRecovery);
+               else
+                       NET_INC_STATS_BH(TCPSackRecovery);
+
+               tp->high_seq = tp->snd_nxt;
+               tp->prior_ssthresh = 0;
+               tp->undo_marker = tp->snd_una;
+               tp->undo_retrans = tp->retrans_out;
+
+               if (tp->ca_state < TCP_CA_CWR) {
+                       if (!(flag&FLAG_ECE))
+                               tp->prior_ssthresh = tcp_current_ssthresh(tp);
+                       tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
+                       TCP_ECN_queue_cwr(tp);
                }
+
+               tp->snd_cwnd_cnt = 0;
+               tp->ca_state = TCP_CA_Recovery;
+       }
+
+       if (is_dupack)
+               tcp_update_scoreboard(sk, tp);
+       tcp_cwnd_down(tp);
+       tcp_xmit_retransmit_queue(sk);
+}
+
+/* Read draft-ietf-tcplw-high-performance before mucking
+ * with this code. (Superceeds RFC1323)
+ */
+static void tcp_ack_saw_tstamp(struct tcp_opt *tp)
+{
+       __u32 seq_rtt;
+
+       /* RTTM Rule: A TSecr value received in a segment is used to
+        * update the averaged RTT measurement only if the segment
+        * acknowledges some new data, i.e., only if it advances the
+        * left edge of the send window.
+        *
+        * See draft-ietf-tcplw-high-performance-00, section 3.3.
+        * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
+        */
+       seq_rtt = tcp_time_stamp - tp->rcv_tsecr;
+       tcp_rtt_estimator(tp, seq_rtt);
+       tcp_set_rto(tp);
+       tp->rto <<= tp->backoff;
+       tcp_bound_rto(tp);
+}
+
+static void tcp_ack_no_tstamp(struct tcp_opt *tp, u32 seq_rtt, int flag)
+{
+       /* We don't have a timestamp. Can only use
+        * packets that are not retransmitted to determine
+        * rtt estimates. Also, we must not reset the
+        * backoff for rto until we get a non-retransmitted
+        * packet. This allows us to deal with a situation
+        * where the network delay has increased suddenly.
+        * I.e. Karn's algorithm. (SIGCOMM '87, p5.)
+        */
+
+       if (!tp->retransmits && !(flag & FLAG_RETRANS_DATA_ACKED)) {
+               tp->backoff = 0;
+               tcp_rtt_estimator(tp, seq_rtt);
+               tcp_set_rto(tp);
+               tcp_bound_rto(tp);
        }
 }
 
+static __inline__ void
+tcp_ack_update_rtt(struct tcp_opt *tp, int flag, u32 seq_rtt)
+{
+       if (tp->saw_tstamp)
+               tcp_ack_saw_tstamp(tp);
+       else
+               tcp_ack_no_tstamp(tp, seq_rtt, flag);
+}
+
 /* This is Jacobson's slow start and congestion avoidance. 
  * SIGCOMM '88, p. 328.
  */
@@ -855,31 +1643,38 @@ static __inline__ void tcp_cong_avoid(struct tcp_opt *tp)
         }
 }
 
+static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
+{
+       if (tp->packets_out==0) {
+               tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS);
+       } else {
+               struct sk_buff *skb = skb_peek(&sk->write_queue);
+               __u32 when = tp->rto - (tcp_time_stamp - TCP_SKB_CB(skb)->when);
+
+               if ((__s32)when <= 0)
+                       when = TCP_RTO_MIN;
+               tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, when);
+       }
+}
+
 /* Remove acknowledged frames from the retransmission queue. */
-static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack,
-                              __u32 *seq, __u32 *seq_rtt)
+static int tcp_clean_rtx_queue(struct sock *sk)
 {
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
        struct sk_buff *skb;
        __u32 now = tcp_time_stamp;
        int acked = 0;
-
-       /* If we are retransmitting, and this ACK clears up to
-        * the retransmit head, or further, then clear our state.
-        */
-       if (tp->retrans_head != NULL &&
-           !before(ack, TCP_SKB_CB(tp->retrans_head)->end_seq))
-               tp->retrans_head = NULL;
+       __u32 seq_rtt = 0; /* F..g gcc... */
 
        while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head)) {
                struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 
                __u8 sacked = scb->sacked;
-               
+
                /* If our packet is before the ack sequence we can
                 * discard it as it's confirmed to have arrived at
                 * the other end.
                 */
-               if (after(scb->end_seq, ack))
+               if (after(scb->end_seq, tp->snd_una))
                        break;
 
                /* Initial outgoing SYN's get put onto the write_queue
@@ -889,711 +1684,482 @@ static int tcp_clean_rtx_queue(struct sock *sk, __u32 ack,
                 * connection startup slow start one packet too
                 * quickly.  This is severely frowned upon behavior.
                 */
-               if((sacked & TCPCB_SACKED_RETRANS) && tp->retrans_out)
-                       tp->retrans_out--;
                if(!(scb->flags & TCPCB_FLAG_SYN)) {
                        acked |= FLAG_DATA_ACKED;
-                       if(sacked & TCPCB_SACKED_RETRANS)
-                               acked |= FLAG_RETRANS_DATA_ACKED;
-                       if(tp->fackets_out)
-                               tp->fackets_out--;
                } else {
                        acked |= FLAG_SYN_ACKED;
-                       /* This is pure paranoia. */
-                       tp->retrans_head = NULL;
                }
+
+               if (sacked) {
+                       if(sacked & TCPCB_RETRANS) {
+                               if(sacked & TCPCB_SACKED_RETRANS)
+                                       tp->retrans_out--;
+                               acked |= FLAG_RETRANS_DATA_ACKED;
+                       }
+                       if(sacked & TCPCB_SACKED_ACKED)
+                               tp->sacked_out--;
+                       if(sacked & TCPCB_LOST)
+                               tp->lost_out--;
+               }
+               if(tp->fackets_out)
+                       tp->fackets_out--;
                tp->packets_out--;
-               *seq = scb->seq;
-               *seq_rtt = now - scb->when;
+               seq_rtt = now - scb->when;
                __skb_unlink(skb, skb->list);
-               kfree_skb(skb);
+               tcp_free_skb(sk, skb);
+       }
+
+       if (acked&FLAG_ACKED) {
+               tcp_ack_update_rtt(tp, acked, seq_rtt);
+               tcp_ack_packets_out(sk, tp);
+       }
+
+#if FASTRETRANS_DEBUG > 0
+       BUG_TRAP((int)tp->sacked_out >= 0);
+       BUG_TRAP((int)tp->lost_out >= 0);
+       BUG_TRAP((int)tp->retrans_out >= 0);
+       if (tp->packets_out==0 && tp->sack_ok) {
+               if (tp->lost_out) {
+                       printk(KERN_DEBUG "Leak l=%u %d\n", tp->lost_out, tp->ca_state);
+                       tp->lost_out = 0;
+               }
+               if (tp->sacked_out) {
+                       printk(KERN_DEBUG "Leak s=%u %d\n", tp->sacked_out, tp->ca_state);
+                       tp->sacked_out = 0;
+               }
+               if (tp->retrans_out) {
+                       printk(KERN_DEBUG "Leak r=%u %d\n", tp->retrans_out, tp->ca_state);
+                       tp->retrans_out = 0;
+               }
        }
+#endif
        return acked;
 }
 
-static void tcp_ack_probe(struct sock *sk, __u32 ack)
+static void tcp_ack_probe(struct sock *sk)
 {
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-       
+
        /* Was it a usable window open? */
 
-       if (tp->send_head != NULL) {
-               if (!after(TCP_SKB_CB(tp->send_head)->end_seq, ack + tp->snd_wnd)) {
-                       tp->backoff = 0;
-                       tcp_clear_xmit_timer(sk, TCP_TIME_PROBE0);
-                       /* If packets_out==0, socket must be waked up by
-                        * subsequent tcp_data_snd_check(). This function is
-                        * not for random using!
-                        */
-               } else if (!tp->packets_out) {
-                       tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0,
-                                            min(tp->rto << tp->backoff, TCP_RTO_MAX));
-               }
+       if (!after(TCP_SKB_CB(tp->send_head)->end_seq, tp->snd_una + tp->snd_wnd)) {
+               tp->backoff = 0;
+               tcp_clear_xmit_timer(sk, TCP_TIME_PROBE0);
+               /* Socket must be waked up by subsequent tcp_data_snd_check().
+                * This function is not for random using!
+                */
+       } else {
+               tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0,
+                                    min(tp->rto << tp->backoff, TCP_RTO_MAX));
        }
 }
 
-/* Should we open up the congestion window? */
-static __inline__ int should_advance_cwnd(struct tcp_opt *tp, int flag)
+static __inline__ int tcp_ack_is_dubious(struct tcp_opt *tp, int flag)
 {
-       /* Data must have been acked. */
-       if ((flag & FLAG_DATA_ACKED) == 0)
-               return 0;
-
-       /* Some of the data acked was retransmitted somehow? */
-       if ((flag & FLAG_RETRANS_DATA_ACKED) != 0) {
-               /* We advance in all cases except during
-                * non-FACK fast retransmit/recovery.
-                */
-               if (tp->fackets_out != 0 ||
-                   tp->retransmits != 0)
-                       return 1;
+       return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
+               tp->ca_state != TCP_CA_Open);
+}
 
-               /* Non-FACK fast retransmit does it's own
-                * congestion window management, don't get
-                * in the way.
-                */
-               return 0;
-       }
+static __inline__ int tcp_may_raise_cwnd(struct tcp_opt *tp, int flag)
+{
+       return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) &&
+               !((1<<tp->ca_state)&(TCPF_CA_Recovery|TCPF_CA_CWR));
+}
 
-       /* New non-retransmitted data acked, always advance.  */
-       return 1;
+/* Check that window update is acceptable.
+ * The function assumes that snd_una<=ack<=snd_next.
+ */
+static __inline__ int
+tcp_may_update_window(struct tcp_opt *tp, u32 ack, u32 ack_seq, u32 nwin)
+{
+       return (after(ack, tp->snd_una) ||
+               after(ack_seq, tp->snd_wl1) ||
+               (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd));
 }
 
-/* Read draft-ietf-tcplw-high-performance before mucking
- * with this code. (Superceeds RFC1323)
+/* Update our send window.
+ *
+ * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
+ * and in FreeBSD. NetBSD's one is even worse.) is wrong.
  */
-static void tcp_ack_saw_tstamp(struct sock *sk, struct tcp_opt *tp,
-                              u32 seq, u32 ack, int flag)
+static int tcp_ack_update_window(struct sock *sk, struct tcp_opt *tp,
+                                struct sk_buff *skb, u32 ack, u32 ack_seq)
 {
-       __u32 seq_rtt;
+       int flag = 0;
+       u32 nwin = ntohs(skb->h.th->window) << tp->snd_wscale;
 
-       /* RTTM Rule: A TSecr value received in a segment is used to
-        * update the averaged RTT measurement only if the segment
-        * acknowledges some new data, i.e., only if it advances the
-        * left edge of the send window.
-        *
-        * See draft-ietf-tcplw-high-performance-00, section 3.3.
-        * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
-        */
-       if (!(flag & (FLAG_DATA_ACKED|FLAG_SYN_ACKED)))
-               return;
+       if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
+               flag |= FLAG_WIN_UPDATE;
+               tcp_update_wl(tp, ack, ack_seq);
 
-       seq_rtt = tcp_time_stamp - tp->rcv_tsecr;
-       tcp_rtt_estimator(tp, seq_rtt);
-       if (tp->retransmits) {
-               if (tp->packets_out == 0) {
-                       tp->retransmits = 0;
-                       tp->fackets_out = 0;
-                       tp->retrans_out = 0;
-                       tp->backoff = 0;
-                       tcp_set_rto(tp);
-               } else {
-                       /* Still retransmitting, use backoff */
-                       tcp_set_rto(tp);
-                       tp->rto = tp->rto << tp->backoff;
+               if (tp->snd_wnd != nwin) {
+                       tp->snd_wnd = nwin;
+
+                       /* Note, it is the only place, where
+                        * fast path is recovered for sending TCP.
+                        */
+                       if (skb_queue_len(&tp->out_of_order_queue) == 0 &&
+#ifdef TCP_FORMAL_WINDOW
+                           tcp_receive_window(tp) &&
+#endif
+                           !tp->urg_data)
+                               tcp_fast_path_on(tp);
+
+                       if (nwin > tp->max_window) {
+                               tp->max_window = nwin;
+                               tcp_sync_mss(sk, tp->pmtu_cookie);
+                       }
                }
-       } else {
-               tcp_set_rto(tp);
        }
 
-       tcp_bound_rto(tp);
-}
-
-static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
-{
-       struct sk_buff *skb = skb_peek(&sk->write_queue);
+       tp->snd_una = ack;
 
 #ifdef TCP_DEBUG
-       /* It occured in 2.3, because of racy timers. Namely,
-        * retransmit timer did not check packets_out and retransmitted
-        * send_head sometimes and, hence, messed all the write_queue.
-        * Now it is impossible, I bet. --ANK
-        */
-       if (skb == NULL) {
-               printk("Sucks! packets_out=%d, sk=%p, %d\n", tp->packets_out, sk, sk->state);
-               return;
+       if (before(tp->snd_una + tp->snd_wnd, tp->snd_nxt)) {
+               if (net_ratelimit())
+                       printk(KERN_DEBUG "TCP: peer shrinks window. Bad, what else can I say?\n");
        }
 #endif
 
-       /* Some data was ACK'd, if still retransmitting (due to a
-        * timeout), resend more of the retransmit queue.  The
-        * congestion window is handled properly by that code.
-        */
-       if (tp->retransmits) {
-               tcp_xmit_retransmit_queue(sk);
-               tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
-       } else {
-               __u32 when = tp->rto - (tcp_time_stamp - TCP_SKB_CB(skb)->when);
-               if ((__s32)when < 0)
-                       when = 1;
-               tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, when);
-       }
+       return flag;
 }
 
 /* This routine deals with incoming acks, but not outgoing ones. */
-static int tcp_ack(struct sock *sk, struct tcphdr *th, 
-                  u32 ack_seq, u32 ack, int len)
+static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 {
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-       int flag = 0;
-       u32 seq = 0;
-       u32 seq_rtt = 0;
-
-       if(sk->state == TCP_CLOSE)
-               return 1;       /* Dead, can't ack any more so why bother */
+       u32 prior_snd_una = tp->snd_una;
+       u32 ack_seq = TCP_SKB_CB(skb)->seq;
+       u32 ack = TCP_SKB_CB(skb)->ack_seq;
+       u32 prior_in_flight;
+       int prior_packets;
 
        /* If the ack is newer than sent or older than previous acks
         * then we can probably ignore it.
         */
-       if (after(ack, tp->snd_nxt) || before(ack, tp->snd_una))
+       if (after(ack, tp->snd_nxt))
                goto uninteresting_ack;
 
-       /* If there is data set flag 1 */
-       if (len != th->doff*4)
-               flag |= FLAG_DATA;
+       if (before(ack, prior_snd_una))
+               goto old_ack;
 
-       /* Update our send window. */
-
-       /* This is the window update code as per RFC 793
-        * snd_wl{1,2} are used to prevent unordered
-        * segments from shrinking the window 
-        */
-       if (before(tp->snd_wl1, ack_seq) ||
-           (tp->snd_wl1 == ack_seq && !after(tp->snd_wl2, ack))) {
-               u32 nwin = ntohs(th->window) << tp->snd_wscale;
+       if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
+               /* Window is constant, pure forward advance.
+                * No more checks are required.
+                * Note, we use the fact that SND.UNA>=SND.WL2.
+                */
+               tcp_update_wl(tp, ack, ack_seq);
+               tp->snd_una = ack;
+               flag |= FLAG_WIN_UPDATE;
 
-               if ((tp->snd_wl2 != ack) || (nwin > tp->snd_wnd)) {
-                       flag |= FLAG_WIN_UPDATE;
-                       if (tp->snd_wnd != nwin) {
-                               tp->snd_wnd = nwin;
+               NET_INC_STATS_BH(TCPHPAcks);
+       } else {
+               if (ack_seq != TCP_SKB_CB(skb)->end_seq)
+                       flag |= FLAG_DATA;
+               else
+                       NET_INC_STATS_BH(TCPPureAcks);
 
-                               /* Note, it is the only place, where
-                                * fast path is recovered for sending TCP.
-                                */
-                               if (skb_queue_len(&tp->out_of_order_queue) == 0 &&
-#ifdef TCP_FORMAL_WINDOW
-                                   tcp_receive_window(tp) &&
-#endif
-                                   !tp->urg_data)
-                                       tcp_fast_path_on(tp);
+               flag |= tcp_ack_update_window(sk, tp, skb, ack, ack_seq);
 
-                               if (nwin > tp->max_window) {
-                                       tp->max_window = nwin;
-                                       tcp_sync_mss(sk, tp->pmtu_cookie);
-                               }
-                       }
+               if (TCP_SKB_CB(skb)->sacked)
+                       flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
 
-                       tp->snd_wl1 = ack_seq;
-                       tp->snd_wl2 = ack;
-               }
+               if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th))
+                       flag |= FLAG_ECE;
        }
 
-       /* BEWARE! From this place and until return from this function
-        * snd_nxt and snd_wnd are out of sync. All the routines, called
-        * from here must get "ack" as argument or they should not depend
-        * on right edge of window. It is _UGLY_. It cries to be fixed. --ANK
-        */
-
        /* We passed data and got it acked, remove any soft error
         * log. Something worked...
         */
        sk->err_soft = 0;
-       tp->probes_out = 0;
        tp->rcv_tstamp = tcp_time_stamp;
+       if ((prior_packets = tp->packets_out) == 0)
+               goto no_queue;
 
-       /* See if we can take anything off of the retransmit queue. */
-       flag |= tcp_clean_rtx_queue(sk, ack, &seq, &seq_rtt);
-
-       /* If this ack opens up a zero window, clear backoff.  It was
-        * being used to time the probes, and is probably far higher than
-        * it needs to be for normal retransmission.
-        */
-       if (tcp_timer_is_set(sk, TCP_TIME_PROBE0))
-               tcp_ack_probe(sk, ack);
-
-       /* We must do this here, before code below clears out important
-        * state contained in tp->fackets_out and tp->retransmits.  -DaveM
-        */
-       if (should_advance_cwnd(tp, flag))
-               tcp_cong_avoid(tp);
-
-       /* If we have a timestamp, we always do rtt estimates. */
-       if (tp->saw_tstamp) {
-               tcp_ack_saw_tstamp(sk, tp, seq, ack, flag);
-       } else {
-               /* If we were retransmiting don't count rtt estimate. */
-               if (tp->retransmits) {
-                       if (tp->packets_out == 0) {
-                               tp->retransmits = 0;
-                               tp->fackets_out = 0;
-                               tp->retrans_out = 0;
-                       }
-               } else {
-                       /* We don't have a timestamp. Can only use
-                        * packets that are not retransmitted to determine
-                        * rtt estimates. Also, we must not reset the
-                        * backoff for rto until we get a non-retransmitted
-                        * packet. This allows us to deal with a situation
-                        * where the network delay has increased suddenly.
-                        * I.e. Karn's algorithm. (SIGCOMM '87, p5.)
-                        */
-                       if (flag & (FLAG_DATA_ACKED|FLAG_SYN_ACKED)) {
-                               if(!(flag & FLAG_RETRANS_DATA_ACKED)) {
-                                       tp->backoff = 0;
-                                       tcp_rtt_estimator(tp, seq_rtt);
-                                       tcp_set_rto(tp);
-                                       tcp_bound_rto(tp);
-                               }
-                       }
-               }
-       }
+       prior_in_flight = tcp_packets_in_flight(tp);
 
-       if (tp->packets_out) {
-               if (flag & FLAG_DATA_ACKED)
-                       tcp_ack_packets_out(sk, tp);
+       /* See if we can take anything off of the retransmit queue. */
+       flag |= tcp_clean_rtx_queue(sk);
+
+       if (tcp_ack_is_dubious(tp, flag)) {
+               /* Advanve CWND, if state allows this. */
+               if ((flag&FLAG_DATA_ACKED) && prior_in_flight >= tp->snd_cwnd &&
+                   tcp_may_raise_cwnd(tp, flag))
+                       tcp_cong_avoid(tp);
+               tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
        } else {
-               tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS);
+               if ((flag&FLAG_DATA_ACKED) && prior_in_flight >= tp->snd_cwnd)
+                       tcp_cong_avoid(tp);
        }
 
-       flag &= (FLAG_DATA | FLAG_WIN_UPDATE);
-       if ((ack == tp->snd_una && tp->packets_out && flag == 0) ||
-           (tp->high_seq != 0)) {
-               tcp_fast_retrans(sk, ack, flag);
-       } else {
-               /* Clear any aborted fast retransmit starts. */
-               tp->dup_acks = 0;
-       }
-       /* It is not a brain fart, I thought a bit now. 8)
-        *
-        * Forward progress is indicated, if:
-        *   1. the ack acknowledges new data.
-        *   2. or the ack is duplicate, but it is caused by new segment
-        *      arrival. This case is filtered by:
-        *      - it contains no data, syn or fin.
-        *      - it does not update window.
-        *   3. or new SACK. It is difficult to check, so that we ignore it.
-        *
-        * Forward progress is also indicated by arrival new data,
-        * which was caused by window open from our side. This case is more
-        * difficult and it is made (alas, incorrectly) in tcp_data_queue().
-        *                                              --ANK (990513)
-        */
-       if (ack != tp->snd_una || (flag == 0 && !th->fin))
+       if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))
                dst_confirm(sk->dst_cache);
 
-       if (ack != tp->snd_una)
-               tp->sorry = 1;
-
-       /* Remember the highest ack received. */
-       tp->snd_una = ack;
        return 1;
 
-uninteresting_ack:
-       SOCK_DEBUG(sk, "Ack ignored %u %u\n", ack, tp->snd_nxt);
-       return 0;
-}
-
-int tcp_paws_check(struct tcp_opt *tp, int rst)
-{
-       if ((s32)(tp->rcv_tsval - tp->ts_recent) >= 0)
-               return 0;
-       if (xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
-               return 0;
+no_queue:
+       tp->probes_out = 0;
 
-       /* RST segments are not recommended to carry timestamp,
-          and, if they do, it is recommended to ignore PAWS because
-          "their cleanup function should take precedence over timestamps."
-          Certainly, it is mistake. It is necessary to understand the reasons
-          of this constraint to relax it: if peer reboots, clock may go
-          out-of-sync and half-open connections will not be reset.
-          Actually, the problem would be not existing if all
-          the implementations followed draft about maintaining clock
-          via reboots. Linux-2.2 DOES NOT!
-
-          However, we can relax time bounds for RST segments to MSL.
+       /* If this ack opens up a zero window, clear backoff.  It was
+        * being used to time the probes, and is probably far higher than
+        * it needs to be for normal retransmission.
         */
-       if (rst && xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_MSL)
-               return 0;
+       if (tp->send_head)
+               tcp_ack_probe(sk);
        return 1;
-}
-
-static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
-{
-       if (seq == s_win)
-               return 1;
-       if (after(end_seq, s_win) && before(seq, e_win))
-               return 1;
-       return (seq == e_win && seq == end_seq);
-}
 
-/* New-style handling of TIME_WAIT sockets. */
+old_ack:
+       if (TCP_SKB_CB(skb)->sacked)
+               tcp_sacktag_write_queue(sk, skb, prior_snd_una);
 
-/* Must be called with locally disabled BHs. */
-void tcp_timewait_kill(struct tcp_tw_bucket *tw)
-{
-       struct tcp_ehash_bucket *ehead;
-       struct tcp_bind_hashbucket *bhead;
-       struct tcp_bind_bucket *tb;
+uninteresting_ack:
+       SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
+       return 0;
+}
 
-       /* Unlink from established hashes. */
-       ehead = &tcp_ehash[tw->hashent];
-       write_lock(&ehead->lock);
-       if (!tw->pprev) {
-               write_unlock(&ehead->lock);
-               return;
-       }
-       if(tw->next)
-               tw->next->pprev = tw->pprev;
-       *(tw->pprev) = tw->next;
-       tw->pprev = NULL;
-       write_unlock(&ehead->lock);
-
-       /* Disassociate with bind bucket. */
-       bhead = &tcp_bhash[tcp_bhashfn(tw->num)];
-       spin_lock(&bhead->lock);
-       if ((tb = tw->tb) != NULL) {
-               if(tw->bind_next)
-                       tw->bind_next->bind_pprev = tw->bind_pprev;
-               *(tw->bind_pprev) = tw->bind_next;
-               tw->tb = NULL;
-               if (tb->owners == NULL) {
-                       if (tb->next)
-                               tb->next->pprev = tb->pprev;
-                       *(tb->pprev) = tb->next;
-                       kmem_cache_free(tcp_bucket_cachep, tb);
-               }
-       }
-       spin_unlock(&bhead->lock);
 
-#ifdef INET_REFCNT_DEBUG
-       if (atomic_read(&tw->refcnt) != 1) {
-               printk(KERN_DEBUG "tw_bucket %p refcnt=%d\n", tw, atomic_read(&tw->refcnt));
-       }
-#endif
-       tcp_tw_put(tw);
-}
-
-/* 
- * * Main purpose of TIME-WAIT state is to close connection gracefully,
- *   when one of ends sits in LAST-ACK or CLOSING retransmitting FIN
- *   (and, probably, tail of data) and one or more our ACKs are lost.
- * * What is TIME-WAIT timeout? It is associated with maximal packet
- *   lifetime in the internet, which results in wrong conclusion, that
- *   it is set to catch "old duplicate segments" wandering out of their path.
- *   It is not quite correct. This timeout is calculated so that it exceeds
- *   maximal retransmision timeout enough to allow to lose one (or more)
- *   segments sent by peer and our ACKs. This time may be calculated from RTO.
- * * When TIME-WAIT socket receives RST, it means that another end
- *   finally closed and we are allowed to kill TIME-WAIT too.
- * * Second purpose of TIME-WAIT is catching old duplicate segments.
- *   Well, certainly it is pure paranoia, but if we load TIME-WAIT
- *   with this semantics, we MUST NOT kill TIME-WAIT state with RSTs.
- * * If we invented some more clever way to catch duplicates
- *   (f.e. based on PAWS), we could truncate TIME-WAIT to several RTOs.
- *
- * The algorithm below is based on FORMAL INTERPRETATION of RFCs.
- * When you compare it to RFCs, please, read section SEGMENT ARRIVES
- * from the very beginning.
- *
- * NOTE. With recycling (and later with fin-wait-2) TW bucket
- * is _not_ stateless. It means, that strictly speaking we must
- * spinlock it. I do not want! Well, probability of misbehaviour
- * is ridiculously low and, seems, we could use some mb() tricks
- * to avoid misread sequence numbers, states etc.  --ANK
+/* Look for tcp options. Normally only called on SYN and SYNACK packets.
+ * But, this can also be called on packets in the established flow when
+ * the fast version below fails.
  */
-enum tcp_tw_status
-tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
-                          struct tcphdr *th, unsigned len)
+void tcp_parse_options(struct sk_buff *skb, struct tcp_opt *tp)
 {
-       struct tcp_opt tp;
-       int paws_reject = 0;
-
-       tp.saw_tstamp = 0;
-       if (th->doff > (sizeof(struct tcphdr)>>2) && tw->ts_recent_stamp) {
-               tcp_parse_options(NULL, th, &tp, 0);
-
-               if (tp.saw_tstamp) {
-                       tp.ts_recent = tw->ts_recent;
-                       tp.ts_recent_stamp = tw->ts_recent_stamp;
-                       paws_reject = tcp_paws_check(&tp, th->rst);
-               }
-       }
-
-       if (tw->substate == TCP_FIN_WAIT2) {
-               /* Just repeat all the checks of tcp_rcv_state_process() */
-
-               /* Out of window, send ACK */
-               if (paws_reject ||
-                   !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
-                                  tw->rcv_nxt, tw->rcv_nxt + tw->rcv_wnd))
-                       return TCP_TW_ACK;
-
-               if (th->rst)
-                       goto kill;
-
-               if (th->syn && TCP_SKB_CB(skb)->seq != tw->syn_seq)
-                       goto kill_with_rst;
-
-               /* Dup ACK? */
-               if (!after(TCP_SKB_CB(skb)->end_seq, tw->rcv_nxt) ||
-                   TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
-                       tcp_tw_put(tw);
-                       return TCP_TW_SUCCESS;
-               }
-
-               /* New data or FIN. If new data arrive after half-duplex close,
-                * reset.
-                */
-               if (!th->fin || TCP_SKB_CB(skb)->end_seq != tw->rcv_nxt+1) {
-kill_with_rst:
-                       tcp_tw_deschedule(tw);
-                       tcp_timewait_kill(tw);
-                       tcp_tw_put(tw);
-                       return TCP_TW_RST;
-               }
-
-               /* FIN arrived, enter true time-wait state. */
-               tw->substate = TCP_TIME_WAIT;
-               tw->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-               if (tp.saw_tstamp) {
-                       tw->ts_recent_stamp = xtime.tv_sec;
-                       tw->ts_recent = tp.rcv_tsval;
-               }
-
-               /* I am shamed, but failed to make it more elegant.
-                * Yes, it is direct reference to IP, which is impossible
-                * to generalize to IPv6. Taking into account that IPv6
-                * do not undertsnad recycling in any case, it not
-                * a big problem in practice. --ANK */
-               if (tw->family == AF_INET &&
-                   sysctl_tcp_tw_recycle && tw->ts_recent_stamp &&
-                   tcp_v4_tw_remember_stamp(tw))
-                       tcp_tw_schedule(tw, tw->timeout);
-               else
-                       tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
-               return TCP_TW_ACK;
-       }
-
-       /*
-        *      Now real TIME-WAIT state.
-        *
-        *      RFC 1122:
-        *      "When a connection is [...] on TIME-WAIT state [...]
-        *      [a TCP] MAY accept a new SYN from the remote TCP to
-        *      reopen the connection directly, if it:
-        *      
-        *      (1)  assigns its initial sequence number for the new
-        *      connection to be larger than the largest sequence
-        *      number it used on the previous connection incarnation,
-        *      and
-        *
-        *      (2)  returns to TIME-WAIT state if the SYN turns out 
-        *      to be an old duplicate".
-        */
+       unsigned char *ptr;
+       struct tcphdr *th = skb->h.th;
+       int length=(th->doff*4)-sizeof(struct tcphdr);
 
-       if (!paws_reject &&
-           (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq &&
-            TCP_SKB_CB(skb)->seq == tw->rcv_nxt)) {
-               /* In window segment, it may be only reset or bare ack. */
+       ptr = (unsigned char *)(th + 1);
+       tp->saw_tstamp = 0;
 
-               if (th->rst) {
-                       /* This is TIME_WAIT assasination, in two flavors.
-                        * Oh well... nobody has a sufficient solution to this
-                        * protocol bug yet.
-                        */
-                       if (sysctl_tcp_rfc1337 == 0) {
-kill:
-                               tcp_tw_deschedule(tw);
-                               tcp_timewait_kill(tw);
-                               tcp_tw_put(tw);
-                               return TCP_TW_SUCCESS;
-                       }
-               }
-               tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+       while(length>0) {
+               int opcode=*ptr++;
+               int opsize;
 
-               if (tp.saw_tstamp) {
-                       tw->ts_recent = tp.rcv_tsval;
-                       tw->ts_recent_stamp = xtime.tv_sec;
-               }
+               switch (opcode) {
+                       case TCPOPT_EOL:
+                               return;
+                       case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
+                               length--;
+                               continue;
+                       default:
+                               opsize=*ptr++;
+                               if (opsize < 2) /* "silly options" */
+                                       return;
+                               if (opsize > length)
+                                       break;  /* don't parse partial options */
+                               switch(opcode) {
+                               case TCPOPT_MSS:
+                                       if(opsize==TCPOLEN_MSS && th->syn) {
+                                               u16 in_mss = ntohs(*(__u16 *)ptr);
+                                               if (in_mss) {
+                                                       if (tp->user_mss && tp->user_mss < in_mss)
+                                                               in_mss = tp->user_mss;
+                                                       tp->mss_clamp = in_mss;
+                                               }
+                                       }
+                                       break;
+                               case TCPOPT_WINDOW:
+                                       if(opsize==TCPOLEN_WINDOW && th->syn)
+                                               if (sysctl_tcp_window_scaling) {
+                                                       tp->wscale_ok = 1;
+                                                       tp->snd_wscale = *(__u8 *)ptr;
+                                                       if(tp->snd_wscale > 14) {
+                                                               if(net_ratelimit())
+                                                                       printk("tcp_parse_options: Illegal window "
+                                                                              "scaling value %d >14 received.",
+                                                                              tp->snd_wscale);
+                                                               tp->snd_wscale = 14;
+                                                       }
+                                               }
+                                       break;
+                               case TCPOPT_TIMESTAMP:
+                                       if(opsize==TCPOLEN_TIMESTAMP) {
+                                               if (sysctl_tcp_timestamps) {
+                                                       tp->tstamp_ok = 1;
+                                                       tp->saw_tstamp = 1;
+                                                       tp->rcv_tsval = ntohl(*(__u32 *)ptr);
+                                                       tp->rcv_tsecr = ntohl(*(__u32 *)(ptr+4));
+                                               }
+                                       }
+                                       break;
+                               case TCPOPT_SACK_PERM:
+                                       if(opsize==TCPOLEN_SACK_PERM && th->syn) {
+                                               if (sysctl_tcp_sack) {
+                                                       tp->sack_ok = 1;
+                                                       tcp_sack_reset(tp);
+                                               }
+                                       }
+                                       break;
 
-               tcp_tw_put(tw);
-               return TCP_TW_SUCCESS;
+                               case TCPOPT_SACK:
+                                       if((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+                                          !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
+                                          tp->sack_ok) {
+                                               TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
+                                       }
+                               };
+                               ptr+=opsize-2;
+                               length-=opsize;
+               };
        }
+}
 
-       /* Out of window segment.
-
-          All the segments are ACKed immediately.
-
-          The only exception is new SYN. We accept it, if it is
-          not old duplicate and we are not in danger to be killed
-          by delayed old duplicates. RFC check is that it has
-          newer sequence number works at rates <40Mbit/sec.
-          However, if paws works, it is reliable AND even more,
-          we even may relax silly seq space cutoff.
-
-          RED-PEN: we violate main RFC requirement, if this SYN will appear
-          old duplicate (i.e. we receive RST in reply to SYN-ACK),
-          we must return socket to time-wait state. It is not good,
-          but not fatal yet.
-        */
-
-       if (th->syn && !th->rst && !th->ack && !paws_reject &&
-           (after(TCP_SKB_CB(skb)->seq, tw->rcv_nxt) ||
-            (tp.saw_tstamp && (s32)(tw->ts_recent - tp.rcv_tsval) < 0))) {
-               u32 isn = tw->snd_nxt + 2;
-               if (isn == 0)
-                       isn++;
-               TCP_SKB_CB(skb)->when = isn;
-               return TCP_TW_SYN;
+/* Fast parse options. This hopes to only see timestamps.
+ * If it is wrong it falls back on tcp_parse_options().
+ */
+static __inline__ int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, struct tcp_opt *tp)
+{
+       if (th->doff == sizeof(struct tcphdr)>>2) {
+               tp->saw_tstamp = 0;
+               return 0;
+       } else if (th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
+               __u32 *ptr = (__u32 *)(th + 1);
+               if (*ptr == __constant_ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+                                            | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
+                       tp->saw_tstamp = 1;
+                       ++ptr;
+                       tp->rcv_tsval = ntohl(*ptr);
+                       ++ptr;
+                       tp->rcv_tsecr = ntohl(*ptr);
+                       return 1;
+               }
        }
+       tcp_parse_options(skb, tp);
+       return 1;
+}
 
-       if (paws_reject)
-               NET_INC_STATS_BH(PAWSEstabRejected);
+extern __inline__ void
+tcp_store_ts_recent(struct tcp_opt *tp)
+{
+       tp->ts_recent = tp->rcv_tsval;
+       tp->ts_recent_stamp = xtime.tv_sec;
+}
 
-       if(!th->rst) {
-               /* In this case we must reset the TIMEWAIT timer.
+extern __inline__ void
+tcp_replace_ts_recent(struct tcp_opt *tp, u32 seq)
+{
+       if (tp->saw_tstamp && !after(seq, tp->rcv_wup)) {
+               /* PAWS bug workaround wrt. ACK frames, the PAWS discard
+                * extra check below makes sure this can only happen
+                * for pure ACK frames.  -DaveM
                 *
-                * If it is ACKless SYN it may be both old duplicate
-                * and new good SYN with random sequence number <rcv_nxt.
-                * Do not reschedule in the last case.
+                * Not only, also it occurs for expired timestamps.
                 */
-               if (paws_reject || th->ack)
-                       tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
 
-               /* Send ACK. Note, we do not put the bucket,
-                * it will be released by caller.
-                */
-               return TCP_TW_ACK;
+               if((s32)(tp->rcv_tsval - tp->ts_recent) >= 0 ||
+                  xtime.tv_sec >= tp->ts_recent_stamp + TCP_PAWS_24DAYS)
+                       tcp_store_ts_recent(tp);
        }
-       tcp_tw_put(tw);
-       return TCP_TW_SUCCESS;
 }
 
-/* Enter the time wait state.  This is called with locally disabled BH.
- * Essentially we whip up a timewait bucket, copy the
- * relevant info into it from the SK, and mess with hash chains
- * and list linkage.
+/* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM
+ *
+ * It is not fatal. If this ACK does _not_ change critical state (seqs, window)
+ * it can pass through stack. So, the following predicate verifies that
+ * this segment is not used for anything but congestion avoidance or
+ * fast retransmit. Moreover, we even are able to eliminate most of such
+ * second order effects, if we apply some small "replay" window (~RTO)
+ * to timestamp space.
+ *
+ * All these measures still do not guarantee that we reject wrapped ACKs
+ * on networks with high bandwidth, when sequence space is recycled fastly,
+ * but it guarantees that such events will be very rare and do not affect
+ * connection seriously. This doesn't look nice, but alas, PAWS is really
+ * buggy extension.
+ *
+ * [ Later note. Even worse! It is buggy for segments _with_ data. RFC
+ * states that events when retransmit arrives after original data are rare.
+ * It is a blatant lie. VJ forgot about fast retransmit! 8)8) It is
+ * the biggest problem on large power networks even with minor reordering.
+ * OK, let's give it small replay window. If peer clock is even 1hz, it is safe
+ * up to bandwidth of 18Gigabit/sec. 8) ]
  */
-static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw)
+
+static int tcp_disordered_ack(struct tcp_opt *tp, struct sk_buff *skb)
 {
-       struct tcp_ehash_bucket *ehead = &tcp_ehash[sk->hashent];
-       struct tcp_bind_hashbucket *bhead;
-       struct sock **head, *sktw;
+       struct tcphdr *th = skb->h.th;
+       u32 seq = TCP_SKB_CB(skb)->seq;
+       u32 ack = TCP_SKB_CB(skb)->ack_seq;
 
-       write_lock(&ehead->lock);
+       return (/* 1. Pure ACK with correct sequence number. */
+               (th->ack && seq == TCP_SKB_CB(skb)->end_seq && seq == tp->rcv_nxt) &&
 
-       /* Step 1: Remove SK from established hash. */
-       if (sk->pprev) {
-               if(sk->next)
-                       sk->next->pprev = sk->pprev;
-               *sk->pprev = sk->next;
-               sk->pprev = NULL;
-               sock_prot_dec_use(sk->prot);
-       }
+               /* 2. ... and duplicate ACK. */
+               ack == tp->snd_una &&
 
-       /* Step 2: Hash TW into TIMEWAIT half of established hash table. */
-       head = &(ehead + tcp_ehash_size)->chain;
-       sktw = (struct sock *)tw;
-       if((sktw->next = *head) != NULL)
-               (*head)->pprev = &sktw->next;
-       *head = sktw;
-       sktw->pprev = head;
-       atomic_inc(&tw->refcnt);
+               /* 3. ... and does not update window. */
+               !tcp_may_update_window(tp, ack, seq, ntohs(th->window)<<tp->snd_wscale) &&
 
-       write_unlock(&ehead->lock);
+               /* 4. ... and sits in replay window. */
+               (s32)(tp->ts_recent - tp->rcv_tsval) <= (tp->rto*1024)/HZ);
+}
 
-       /* Step 3: Put TW into bind hash. Original socket stays there too.
-          Note, that any socket with sk->num!=0 MUST be bound in binding
-          cache, even if it is closed.
-        */
-       bhead = &tcp_bhash[tcp_bhashfn(sk->num)];
-       spin_lock(&bhead->lock);
-       tw->tb = (struct tcp_bind_bucket *)sk->prev;
-       BUG_TRAP(sk->prev!=NULL);
-       if ((tw->bind_next = tw->tb->owners) != NULL)
-               tw->tb->owners->bind_pprev = &tw->bind_next;
-       tw->tb->owners = (struct sock*)tw;
-       tw->bind_pprev = &tw->tb->owners;
-       spin_unlock(&bhead->lock);
-}
-
-/* 
- * Move a socket to time-wait or dead fin-wait-2 state.
- */ 
-void tcp_time_wait(struct sock *sk, int state, int timeo)
+extern __inline__ int tcp_paws_discard(struct tcp_opt *tp, struct sk_buff *skb)
 {
-       struct tcp_tw_bucket *tw = NULL;
-       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-       int recycle_ok = 0;
-
-       if (sysctl_tcp_tw_recycle && tp->ts_recent_stamp)
-               recycle_ok = tp->af_specific->remember_stamp(sk);
-
-       if (tcp_tw_count < sysctl_tcp_max_tw_buckets)
-               tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC);
-
-       if(tw != NULL) {
-               int rto = (tp->rto<<2) - (tp->rto>>1);
-
-               /* Give us an identity. */
-               tw->daddr       = sk->daddr;
-               tw->rcv_saddr   = sk->rcv_saddr;
-               tw->bound_dev_if= sk->bound_dev_if;
-               tw->num         = sk->num;
-               tw->state       = TCP_TIME_WAIT;
-               tw->substate    = state;
-               tw->sport       = sk->sport;
-               tw->dport       = sk->dport;
-               tw->family      = sk->family;
-               tw->reuse       = sk->reuse;
-               tw->rcv_wscale  = tp->rcv_wscale;
-               atomic_set(&tw->refcnt, 0);
-
-               tw->hashent     = sk->hashent;
-               tw->rcv_nxt     = tp->rcv_nxt;
-               tw->snd_nxt     = tp->snd_nxt;
-               tw->rcv_wnd     = tcp_receive_window(tp);
-               tw->syn_seq     = tp->syn_seq;
-               tw->ts_recent   = tp->ts_recent;
-               tw->ts_recent_stamp= tp->ts_recent_stamp;
-               tw->pprev_death = NULL;
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-               if(tw->family == PF_INET6) {
-                       memcpy(&tw->v6_daddr,
-                              &sk->net_pinfo.af_inet6.daddr,
-                              sizeof(struct in6_addr));
-                       memcpy(&tw->v6_rcv_saddr,
-                              &sk->net_pinfo.af_inet6.rcv_saddr,
-                              sizeof(struct in6_addr));
-               }
+       return ((s32)(tp->ts_recent - tp->rcv_tsval) > TCP_PAWS_WINDOW &&
+               xtime.tv_sec < tp->ts_recent_stamp + TCP_PAWS_24DAYS &&
+               !tcp_disordered_ack(tp, skb));
+}
+
+static int __tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq)
+{
+       u32 end_window = tp->rcv_wup + tp->rcv_wnd;
+#ifdef TCP_FORMAL_WINDOW
+       u32 rcv_wnd = tcp_receive_window(tp);
+#else
+       u32 rcv_wnd = tp->rcv_wnd;
 #endif
-               /* Linkage updates. */
-               __tcp_tw_hashdance(sk, tw);
 
-               /* Get the TIME_WAIT timeout firing. */
-               if (timeo < rto)
-                       timeo = rto;
+       if (rcv_wnd &&
+           after(end_seq, tp->rcv_nxt) &&
+           before(seq, end_window))
+               return 1;
+       if (seq != end_window)
+               return 0;
+       return (seq == end_seq);
+}
 
-               if (recycle_ok) {
-                       tw->timeout = rto;
-               } else {
-                       tw->timeout = TCP_TIMEWAIT_LEN;
-                       if (state == TCP_TIME_WAIT)
-                               timeo = TCP_TIMEWAIT_LEN;
-               }
+/* This functions checks to see if the tcp header is actually acceptable.
+ *
+ * Actually, our check is seriously broken, we must accept RST,ACK,URG
+ * even on zero window effectively trimming data. It is RFC, guys.
+ * But our check is so beautiful, that I do not want to repair it
+ * now. However, taking into account those stupid plans to start to
+ * send some texts with RST, we have to handle at least this case. --ANK
+ */
+extern __inline__ int tcp_sequence(struct tcp_opt *tp, u32 seq, u32 end_seq, int rst)
+{
+#ifdef TCP_FORMAL_WINDOW
+       u32 rcv_wnd = tcp_receive_window(tp);
+#else
+       u32 rcv_wnd = tp->rcv_wnd;
+#endif
+       if (seq == tp->rcv_nxt)
+               return (rcv_wnd || (end_seq == seq) || rst);
 
-               tcp_tw_schedule(tw, timeo);
-       } else {
-               /* Sorry, if we're out of memory, just CLOSE this
-                * socket up.  We've got bigger problems than
-                * non-graceful socket closings.
-                */
-               if (net_ratelimit())
-                       printk(KERN_INFO "TCP: time wait bucket table overflow\n");
+       return __tcp_sequence(tp, seq, end_seq);
+}
+
+/* When we get a reset we do this. */
+static void tcp_reset(struct sock *sk)
+{
+       /* We want the right error as BSD sees it (and indeed as we do). */
+       switch (sk->state) {
+               case TCP_SYN_SENT:
+                       sk->err = ECONNREFUSED;
+                       break;
+               case TCP_CLOSE_WAIT:
+                       sk->err = EPIPE;
+                       break;
+               case TCP_CLOSE:
+                       return;
+               default:
+                       sk->err = ECONNRESET;
        }
 
-       tcp_update_metrics(sk);
+       if (!sk->dead)
+               sk->error_report(sk);
+
        tcp_done(sk);
 }
 
@@ -1611,22 +2177,22 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
  *
  *     If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
  */
 static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
 {
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
        tp->fin_seq = TCP_SKB_CB(skb)->end_seq;
-       tp->ack.pending = 1;
-       tp->ack.quick = 0;
+       tcp_schedule_ack(tp);
 
        sk->shutdown |= RCV_SHUTDOWN;
+       sk->done = 1;
 
        switch(sk->state) {
                case TCP_SYN_RECV:
                case TCP_ESTABLISHED:
                        /* Move to CLOSE_WAIT */
                        tcp_set_state(sk, TCP_CLOSE_WAIT);
+                       tp->ack.pingpong = 1;
                        break;
 
                case TCP_CLOSE_WAIT:
@@ -1644,6 +2210,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
                         * happens, we must ack the received FIN and
                         * enter the CLOSING state.
                         */
+                       tcp_send_ack(sk);
                        tcp_set_state(sk, TCP_CLOSING);
                        break;
                case TCP_FIN_WAIT2:
@@ -1664,7 +2231,8 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
         */
        __skb_queue_purge(&tp->out_of_order_queue);
        if (tp->sack_ok)
-               tp->num_sacks = 0;
+               tcp_sack_reset(tp);
+       tcp_mem_reclaim(sk);
 
        if (!sk->dead) {
                sk->state_change(sk);
@@ -1677,51 +2245,90 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
        }
 }
 
+static __inline__ int
+tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
+{
+       if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
+               if (before(seq, sp->start_seq))
+                       sp->start_seq = seq;
+               if (after(end_seq, sp->end_seq))
+                       sp->end_seq = end_seq;
+               return 1;
+       }
+       return 0;
+}
+
+static __inline__ void tcp_dsack_set(struct tcp_opt *tp, u32 seq, u32 end_seq)
+{
+       if (tp->sack_ok && sysctl_tcp_dsack) {
+               if (before(seq, tp->rcv_nxt))
+                       NET_INC_STATS_BH(TCPDSACKOldSent);
+               else
+                       NET_INC_STATS_BH(TCPDSACKOfoSent);
+
+               tp->dsack = 1;
+               tp->duplicate_sack[0].start_seq = seq;
+               tp->duplicate_sack[0].end_seq = end_seq;
+               tp->eff_sacks = min(tp->num_sacks+1, 4-tp->tstamp_ok);
+       }
+}
+
+static __inline__ void tcp_dsack_extend(struct tcp_opt *tp, u32 seq, u32 end_seq)
+{
+       if (!tp->dsack)
+               tcp_dsack_set(tp, seq, end_seq);
+       else
+               tcp_sack_extend(tp->duplicate_sack, seq, end_seq);
+}
+
+static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb)
+{
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+       if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
+           before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
+               NET_INC_STATS_BH(DelayedACKLost);
+               tcp_enter_quickack_mode(tp);
+
+               if (tp->sack_ok && sysctl_tcp_dsack) {
+                       u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+
+                       if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
+                               end_seq = tp->rcv_nxt;
+                       tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, end_seq);
+               }
+       }
+
+       tcp_send_ack(sk);
+}
+
 /* These routines update the SACK block as out-of-order packets arrive or
  * in-order packets close up the sequence space.
  */
-static void tcp_sack_maybe_coalesce(struct tcp_opt *tp, struct tcp_sack_block *sp)
+static void tcp_sack_maybe_coalesce(struct tcp_opt *tp)
 {
-       int this_sack, num_sacks = tp->num_sacks;
-       struct tcp_sack_block *swalk = &tp->selective_acks[0];
+       int this_sack;
+       struct tcp_sack_block *sp = &tp->selective_acks[0];
+       struct tcp_sack_block *swalk = sp+1;
 
-       /* If more than one SACK block, see if the recent change to SP eats into
+       /* See if the recent change to the first SACK eats into
         * or hits the sequence space of other SACK blocks, if so coalesce.
         */
-       if(num_sacks != 1) {
-               for(this_sack = 0; this_sack < num_sacks; this_sack++, swalk++) {
-                       if(swalk == sp)
-                               continue;
+       for (this_sack = 1; this_sack < tp->num_sacks; ) {
+               if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
+                       int i;
 
-                       /* First case, bottom of SP moves into top of the
-                        * sequence space of SWALK.
+                       /* Zap SWALK, by moving every further SACK up by one slot.
+                        * Decrease num_sacks.
                         */
-                       if(between(sp->start_seq, swalk->start_seq, swalk->end_seq)) {
-                               sp->start_seq = swalk->start_seq;
-                               goto coalesce;
-                       }
-                       /* Second case, top of SP moves into bottom of the
-                        * sequence space of SWALK.
-                        */
-                       if(between(sp->end_seq, swalk->start_seq, swalk->end_seq)) {
-                               sp->end_seq = swalk->end_seq;
-                               goto coalesce;
-                       }
+                       tp->num_sacks--;
+                       tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok);
+                       for(i=this_sack; i < tp->num_sacks; i++)
+                               sp[i] = sp[i+1];
+                       continue;
                }
+               this_sack++, swalk++;
        }
-       /* SP is the only SACK, or no coalescing cases found. */
-       return;
-
-coalesce:
-       /* Zap SWALK, by moving every further SACK up by one slot.
-        * Decrease num_sacks.
-        */
-       for(; this_sack < num_sacks-1; this_sack++, swalk++) {
-               struct tcp_sack_block *next = (swalk + 1);
-               swalk->start_seq = next->start_seq;
-               swalk->end_seq = next->end_seq;
-       }
-       tp->num_sacks--;
 }
 
 static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
@@ -1737,151 +2344,117 @@ static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sa
        sack2->end_seq = tmp;
 }
 
-static void tcp_sack_new_ofo_skb(struct sock *sk, struct sk_buff *skb)
+static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 {
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
        struct tcp_sack_block *sp = &tp->selective_acks[0];
        int cur_sacks = tp->num_sacks;
+       int this_sack;
 
        if (!cur_sacks)
                goto new_sack;
 
-       /* Optimize for the common case, new ofo frames arrive
-        * "in order". ;-)  This also satisfies the requirements
-        * of RFC2018 about ordering of SACKs.
-        */
-       if(sp->end_seq == TCP_SKB_CB(skb)->seq) {
-               sp->end_seq = TCP_SKB_CB(skb)->end_seq;
-               tcp_sack_maybe_coalesce(tp, sp);
-       } else if(sp->start_seq == TCP_SKB_CB(skb)->end_seq) {
-               /* Re-ordered arrival, in this case, can be optimized
-                * as well.
-                */
-               sp->start_seq = TCP_SKB_CB(skb)->seq;
-               tcp_sack_maybe_coalesce(tp, sp);
-       } else {
-               struct tcp_sack_block *swap = sp + 1;
-               int this_sack, max_sacks = (tp->tstamp_ok ? 3 : 4);
-
-               /* Oh well, we have to move things around.
-                * Try to find a SACK we can tack this onto.
-                */
-
-               for(this_sack = 1; this_sack < cur_sacks; this_sack++, swap++) {
-                       if((swap->end_seq == TCP_SKB_CB(skb)->seq) ||
-                          (swap->start_seq == TCP_SKB_CB(skb)->end_seq)) {
-                               if(swap->end_seq == TCP_SKB_CB(skb)->seq)
-                                       swap->end_seq = TCP_SKB_CB(skb)->end_seq;
-                               else
-                                       swap->start_seq = TCP_SKB_CB(skb)->seq;
-                               tcp_sack_swap(sp, swap);
-                               tcp_sack_maybe_coalesce(tp, sp);
-                               return;
-                       }
-               }
-
-               /* Could not find an adjacent existing SACK, build a new one,
-                * put it at the front, and shift everyone else down.  We
-                * always know there is at least one SACK present already here.
-                *
-                * If the sack array is full, forget about the last one.
-                */
-               if (cur_sacks >= max_sacks) {
-                       cur_sacks--;
-                       tp->num_sacks--;
-               }
-               while(cur_sacks >= 1) {
-                       struct tcp_sack_block *this = &tp->selective_acks[cur_sacks];
-                       struct tcp_sack_block *prev = (this - 1);
-                       this->start_seq = prev->start_seq;
-                       this->end_seq = prev->end_seq;
-                       cur_sacks--;
+       for (this_sack=0; this_sack<cur_sacks; this_sack++, sp++) {
+               if (tcp_sack_extend(sp, seq, end_seq)) {
+                       /* Rotate this_sack to the first one. */
+                       for (; this_sack>0; this_sack--, sp--)
+                               tcp_sack_swap(sp, sp-1);
+                       if (cur_sacks > 1)
+                               tcp_sack_maybe_coalesce(tp);
+                       return;
                }
+       }
 
-       new_sack:
-               /* Build the new head SACK, and we're done. */
-               sp->start_seq = TCP_SKB_CB(skb)->seq;
-               sp->end_seq = TCP_SKB_CB(skb)->end_seq;
-               tp->num_sacks++;
+       /* Could not find an adjacent existing SACK, build a new one,
+        * put it at the front, and shift everyone else down.  We
+        * always know there is at least one SACK present already here.
+        *
+        * If the sack array is full, forget about the last one.
+        */
+       if (this_sack >= 4) {
+               this_sack--;
+               tp->num_sacks--;
+               sp--;
        }
+       for(; this_sack > 0; this_sack--, sp--)
+               *sp = *(sp-1);
+
+new_sack:
+       /* Build the new head SACK, and we're done. */
+       sp->start_seq = seq;
+       sp->end_seq = end_seq;
+       tp->num_sacks++;
+       tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok);
 }
 
-static void tcp_sack_remove_skb(struct tcp_opt *tp, struct sk_buff *skb)
+/* RCV.NXT advances, some SACKs should be eaten. */
+
+static void tcp_sack_remove(struct tcp_opt *tp)
 {
        struct tcp_sack_block *sp = &tp->selective_acks[0];
        int num_sacks = tp->num_sacks;
        int this_sack;
 
-       /* This is an in order data segment _or_ an out-of-order SKB being
-        * moved to the receive queue, so we know this removed SKB will eat
-        * from the front of a SACK.
-        */
-       for(this_sack = 0; this_sack < num_sacks; this_sack++, sp++) {
-               /* Check if the start of the sack is covered by skb. */
-               if(!before(sp->start_seq, TCP_SKB_CB(skb)->seq) &&
-                  before(sp->start_seq, TCP_SKB_CB(skb)->end_seq))
-                       break;
+       /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
+       if (skb_queue_len(&tp->out_of_order_queue) == 0) {
+               tp->num_sacks = 0;
+               tp->eff_sacks = tp->dsack;
+               return;
        }
 
-       /* This should only happen if so many SACKs get built that some get
-        * pushed out before we get here, or we eat some in sequence packets
-        * which are before the first SACK block.
-        */
-       if(this_sack >= num_sacks)
-               return;
+       for(this_sack = 0; this_sack < num_sacks; ) {
+               /* Check if the start of the sack is covered by RCV.NXT. */
+               if (!before(tp->rcv_nxt, sp->start_seq)) {
+                       int i;
+
+                       /* RCV.NXT must cover all the block! */
+                       BUG_TRAP(!before(tp->rcv_nxt, sp->end_seq));
 
-       sp->start_seq = TCP_SKB_CB(skb)->end_seq;
-       if(!before(sp->start_seq, sp->end_seq)) {
-               /* Zap this SACK, by moving forward any other SACKS. */
-               for(this_sack += 1; this_sack < num_sacks; this_sack++, sp++) {
-                       struct tcp_sack_block *next = (sp + 1);
-                       sp->start_seq = next->start_seq;
-                       sp->end_seq = next->end_seq;
+                       /* Zap this SACK, by moving forward any other SACKS. */
+                       for (i=this_sack+1; i < num_sacks; i++)
+                               sp[i-1] = sp[i];
+                       num_sacks--;
+                       continue;
                }
-               tp->num_sacks--;
+               this_sack++;
+               sp++;
        }
-}
-
-static void tcp_sack_extend(struct tcp_opt *tp, struct sk_buff *old_skb, struct sk_buff *new_skb)
-{
-       struct tcp_sack_block *sp = &tp->selective_acks[0];
-       int num_sacks = tp->num_sacks;
-       int this_sack;
-
-       for(this_sack = 0; this_sack < num_sacks; this_sack++, sp++) {
-               if(sp->end_seq == TCP_SKB_CB(old_skb)->end_seq)
-                       break;
+       if (num_sacks != tp->num_sacks) {
+               tp->num_sacks = num_sacks;
+               tp->eff_sacks = min(tp->num_sacks+tp->dsack, 4-tp->tstamp_ok);
        }
-       if(this_sack >= num_sacks)
-               return;
-       sp->end_seq = TCP_SKB_CB(new_skb)->end_seq;
 }
 
-
 /* This one checks to see if we can put data from the
  * out_of_order queue into the receive_queue.
  */
 static void tcp_ofo_queue(struct sock *sk)
 {
-       struct sk_buff *skb;
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       __u32 dsack_high = tp->rcv_nxt;
+       struct sk_buff *skb;
 
-       while ((skb = skb_peek(&tp->out_of_order_queue))) {
+       while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
                if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
                        break;
 
+               if (before(TCP_SKB_CB(skb)->seq, dsack_high)) {
+                       __u32 dsack = dsack_high;
+                       if (before(TCP_SKB_CB(skb)->end_seq, dsack_high))
+                               dsack_high = TCP_SKB_CB(skb)->end_seq;
+                       tcp_dsack_extend(tp, TCP_SKB_CB(skb)->seq, dsack);
+               }
+
                if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
                        SOCK_DEBUG(sk, "ofo packet was already received \n");
                        __skb_unlink(skb, skb->list);
-                       kfree_skb(skb);
+                       __kfree_skb(skb);
                        continue;
                }
                SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
                           tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
                           TCP_SKB_CB(skb)->end_seq);
 
-               if(tp->sack_ok)
-                       tcp_sack_remove_skb(tp, skb);
                __skb_unlink(skb, skb->list);
                __skb_queue_tail(&sk->receive_queue, skb);
                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
@@ -1892,10 +2465,14 @@ static void tcp_ofo_queue(struct sock *sk)
 
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 {
-       struct sk_buff *skb1;
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
        int eaten = 0;
 
+       if (tp->dsack) {
+               tp->dsack = 0;
+               tp->eff_sacks = min(tp->num_sacks, 4-tp->tstamp_ok);
+       }
+
        /*  Queue data for delivery to the user.
         *  Packets in sequence go to the receive queue.
         *  Out of sequence packets to the out_of_order_queue.
@@ -1924,20 +2501,27 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 
                if (!eaten) {
 queue_and_out:
-                       skb_set_owner_r(skb, sk);
+                       tcp_set_owner_r(skb, sk);
                        __skb_queue_tail(&sk->receive_queue, skb);
                }
-               dst_confirm(sk->dst_cache);
                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
                if(skb->len)
-                       tcp_event_data_recv(tp, skb); 
+                       tcp_event_data_recv(sk, tp, skb);
                if(skb->h.th->fin)
                        tcp_fin(skb, sk, skb->h.th);
 
-               /* This may have eaten into a SACK block. */
-               if(tp->sack_ok && tp->num_sacks)
-                       tcp_sack_remove_skb(tp, skb);
-               tcp_ofo_queue(sk);
+               if (skb_queue_len(&tp->out_of_order_queue)) {
+                       tcp_ofo_queue(sk);
+
+                       /* RFC2581. 4.2. SHOULD send immediate ACK, when
+                        * gap in queue is filled.
+                        */
+                       if (skb_queue_len(&tp->out_of_order_queue) == 0)
+                               tp->ack.pingpong = 0;
+               }
+
+               if(tp->num_sacks)
+                       tcp_sack_remove(tp);
 
                /* Turn on fast path. */ 
                if (skb_queue_len(&tp->out_of_order_queue) == 0 &&
@@ -1948,24 +2532,28 @@ queue_and_out:
                        tcp_fast_path_on(tp);
 
                if (eaten) {
-                       kfree_skb(skb);
+                       __kfree_skb(skb);
                } else if (!sk->dead)
                        sk->data_ready(sk, 0);
                return;
        }
 
+#ifdef TCP_DEBUG
        /* An old packet, either a retransmit or some packet got lost. */
        if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
                /* A retransmit, 2nd most common case.  Force an imediate ack.
                 * 
                 * It is impossible, seq is checked by top level.
                 */
-               NETDEBUG(printk("retransmit in tcp_data_queue: seq %X\n", TCP_SKB_CB(skb)->seq));
+               printk("BUG: retransmit in tcp_data_queue: seq %X\n", TCP_SKB_CB(skb)->seq);
                tcp_enter_quickack_mode(tp);
-               tp->ack.pending = 1;
-               kfree_skb(skb);
+               tcp_schedule_ack(tp);
+               __kfree_skb(skb);
                return;
        }
+#endif
+
+       tcp_enter_quickack_mode(tp);
 
        if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
                /* Partial packet, seq < rcv_next < end_seq */
@@ -1973,67 +2561,198 @@ queue_and_out:
                           tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
                           TCP_SKB_CB(skb)->end_seq);
 
+               tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
                goto queue_and_out;
        }
 
-       /* Ok. This is an out_of_order segment, force an ack. */
-       tp->ack.pending = 1;
+       TCP_ECN_check_ce(tp, skb);
 
        /* Disable header prediction. */
        tp->pred_flags = 0;
-
+       tcp_schedule_ack(tp);
 
        SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
                   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
 
-       skb_set_owner_r(skb, sk);
+       tcp_set_owner_r(skb, sk);
 
        if (skb_peek(&tp->out_of_order_queue) == NULL) {
                /* Initial out of order segment, build 1 SACK. */
                if(tp->sack_ok) {
                        tp->num_sacks = 1;
+                       tp->dsack = 0;
+                       tp->eff_sacks = 1;
                        tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
                        tp->selective_acks[0].end_seq = TCP_SKB_CB(skb)->end_seq;
                }
                __skb_queue_head(&tp->out_of_order_queue,skb);
        } else {
-               for(skb1=tp->out_of_order_queue.prev; ; skb1 = skb1->prev) {
-                       /* Already there. */
-                       if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb1)->seq) {
-                               if (skb->len >= skb1->len) {
-                                       if(tp->sack_ok)
-                                               tcp_sack_extend(tp, skb1, skb);
-                                       __skb_append(skb1, skb);
-                                       __skb_unlink(skb1, skb1->list);
-                                       kfree_skb(skb1);
-                               } else {
-                                       /* A duplicate, smaller than what is in the
-                                        * out-of-order queue right now, toss it.
-                                        */
-                                       kfree_skb(skb);
-                               }
+               struct sk_buff *skb1=tp->out_of_order_queue.prev;
+               u32 seq = TCP_SKB_CB(skb)->seq;
+               u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+
+               if (seq == TCP_SKB_CB(skb1)->end_seq) {
+                       __skb_append(skb1, skb);
+
+                       if (tp->num_sacks == 0 ||
+                           tp->selective_acks[0].end_seq != seq)
+                               goto add_sack;
+
+                       /* Common case: data arrive in order after hole. */
+                       tp->selective_acks[0].end_seq = end_seq;
+                       return;
+               }
+
+               /* Find place to insert this segment. */
+               do {
+                       if (!after(TCP_SKB_CB(skb1)->seq, seq))
                                break;
+               } while ((skb1=skb1->prev) != (struct sk_buff*)&tp->out_of_order_queue);
+
+               /* Do skb overlap to previous one? */
+               if (skb1 != (struct sk_buff*)&tp->out_of_order_queue &&
+                   before(seq, TCP_SKB_CB(skb1)->end_seq)) {
+                       if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+                               /* All the bits are present. Drop. */
+                               __kfree_skb(skb);
+                               tcp_dsack_set(tp, seq, end_seq);
+                               goto add_sack;
                        }
-                       
-                       if (after(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq)) {
-                               __skb_append(skb1, skb);
-                               if(tp->sack_ok)
-                                       tcp_sack_new_ofo_skb(sk, skb);
-                               break;
+                       if (after(seq, TCP_SKB_CB(skb1)->seq)) {
+                               /* Partial overlap. */
+                               tcp_dsack_set(tp, seq, TCP_SKB_CB(skb1)->end_seq);
+                       } else {
+                               skb1 = skb1->prev;
                        }
+               }
+               __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue);
+               
+               /* And clean segments covered by new one as whole. */
+               while ((skb1 = skb->next) != (struct sk_buff*)&tp->out_of_order_queue &&
+                      after(end_seq, TCP_SKB_CB(skb1)->seq)) {
+                      if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+                              tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq);
+                              break;
+                      }
+                      __skb_unlink(skb1, skb1->list);
+                      tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq);
+                      __kfree_skb(skb1);
+               }
 
-                        /* See if we've hit the start. If so insert. */
-                       if (skb1 == skb_peek(&tp->out_of_order_queue)) {
-                               __skb_queue_head(&tp->out_of_order_queue,skb);
-                               if(tp->sack_ok)
-                                       tcp_sack_new_ofo_skb(sk, skb);
-                               break;
+add_sack:
+               if (tp->sack_ok)
+                       tcp_sack_new_ofo_skb(sk, seq, end_seq);
+       }
+}
+
+
+static void tcp_collapse_queue(struct sock *sk, struct sk_buff_head *q)
+{
+       struct sk_buff *skb = skb_peek(q);
+       struct sk_buff *skb_next;
+
+       while (skb &&
+              skb != (struct sk_buff *)q &&
+              (skb_next = skb->next) != (struct sk_buff *)q) {
+               struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+               struct tcp_skb_cb *scb_next = TCP_SKB_CB(skb_next);
+
+               if (scb->end_seq == scb_next->seq &&
+                   skb_tailroom(skb) >= skb_next->len &&
+#define TCP_DONT_COLLAPSE (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN)
+                   !(tcp_flag_word(skb->h.th)&TCP_DONT_COLLAPSE) &&
+                   !(tcp_flag_word(skb_next->h.th)&TCP_DONT_COLLAPSE)) {
+                       /* OK to collapse two skbs to one */
+                       memcpy(skb_put(skb, skb_next->len), skb_next->data, skb_next->len);
+                       __skb_unlink(skb_next, skb_next->list);
+                       scb->end_seq = scb_next->end_seq;
+                       __kfree_skb(skb_next);
+                       NET_INC_STATS_BH(TCPRcvCollapsed);
+               } else {
+                       /* Lots of spare tailroom, reallocate this skb to trim it. */
+                       if (tcp_win_from_space(skb->truesize) > skb->len &&
+                           skb_tailroom(skb) > sizeof(struct sk_buff) + 16) {
+                               struct sk_buff *nskb;
+
+                               nskb = skb_copy_expand(skb, skb_headroom(skb), 0, GFP_ATOMIC);
+                               if (nskb) {
+                                       tcp_set_owner_r(nskb, sk);
+                                       memcpy(nskb->data-skb_headroom(skb),
+                                              skb->data-skb_headroom(skb),
+                                              skb_headroom(skb));
+                                       __skb_append(skb, nskb);
+                                       __skb_unlink(skb, skb->list);
+                                       __kfree_skb(skb);
+                               }
                        }
+                       skb = skb_next;
                }
        }
-       return;
 }
 
+/* Clean the out_of_order queue if we can, trying to get
+ * the socket within its memory limits again.
+ *
+ * Return less than zero if we should start dropping frames
+ * until the socket owning process reads some of the data
+ * to stabilize the situation.
+ */
+static int tcp_prune_queue(struct sock *sk)
+{
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; 
+
+       SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
+
+       NET_INC_STATS_BH(PruneCalled);
+
+       if (atomic_read(&sk->rmem_alloc) >= sk->rcvbuf)
+               tcp_clamp_window(sk, tp);
+       else if (tcp_memory_pressure)
+               tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4*tp->advmss);
+
+       tcp_collapse_queue(sk, &sk->receive_queue);
+       tcp_collapse_queue(sk, &tp->out_of_order_queue);
+       tcp_mem_reclaim(sk);
+
+       if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf)
+               return 0;
+
+       /* Collapsing did not help, destructive actions follow.
+        * This must not ever occur. */
+
+       /* First, purge the out_of_order queue. */
+       if (skb_queue_len(&tp->out_of_order_queue)) {
+               net_statistics[smp_processor_id()*2].OfoPruned += skb_queue_len(&tp->out_of_order_queue);
+               __skb_queue_purge(&tp->out_of_order_queue);
+
+               /* Reset SACK state.  A conforming SACK implementation will
+                * do the same at a timeout based retransmit.  When a connection
+                * is in a sad state like this, we care only about integrity
+                * of the connection not performance.
+                */
+               if(tp->sack_ok)
+                       tcp_sack_reset(tp);
+               tcp_mem_reclaim(sk);
+       }
+
+       if(atomic_read(&sk->rmem_alloc) <= sk->rcvbuf)
+               return 0;
+
+       /* If we are really being abused, tell the caller to silently
+        * drop receive data on the floor.  It will get retransmitted
+        * and hopefully then we'll have sufficient space.
+        */
+       NET_INC_STATS_BH(RcvPruned);
+
+       /* Massive buffer overcommit. */
+       return -1;
+}
+
+static inline int tcp_rmem_schedule(struct sock *sk, struct sk_buff *skb)
+{
+       return (int)skb->truesize <= sk->forward_alloc ||
+               tcp_mem_schedule(sk, skb->truesize, 1);
+}
 
 /*
  *     This routine handles the data.  If there is room in the buffer,
@@ -2053,53 +2772,103 @@ static void tcp_data(struct sk_buff *skb, struct sock *sk, unsigned int len)
         if (skb->len == 0 && !th->fin)
                goto drop;
 
+       TCP_ECN_accept_cwr(tp, skb);
+
        /* 
         *      If our receive queue has grown past its limits shrink it.
         *      Make sure to do this before moving rcv_nxt, otherwise
         *      data might be acked for that we don't have enough room.
         */
-       if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf) { 
-               if (prune_queue(sk) < 0) { 
-                       /* Still not enough room. That can happen when
-                        * skb->true_size differs significantly from skb->len.
-                        */
+       if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf ||
+           !tcp_rmem_schedule(sk, skb)) {
+               if (tcp_prune_queue(sk) < 0 || !tcp_rmem_schedule(sk, skb))
                        goto drop;
-               }
        }
 
        tcp_data_queue(sk, skb);
 
+#ifdef TCP_DEBUG
        if (before(tp->rcv_nxt, tp->copied_seq)) {
                printk(KERN_DEBUG "*** tcp.c:tcp_data bug acked < copied\n");
                tp->rcv_nxt = tp->copied_seq;
        }
+#endif
        return;
 
 drop:
-       kfree_skb(skb);
+       __kfree_skb(skb);
+}
+
+/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
+ * As additional protections, we do not touch cwnd in retransmission phases,
+ * and if application hit its sndbuf limit recently.
+ */
+void tcp_cwnd_application_limited(struct sock *sk)
+{
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+       if (tp->ca_state == TCP_CA_Open &&
+           sk->socket && !test_bit(SOCK_NOSPACE, &sk->socket->flags)) {
+               /* Limited by application or receiver window. */
+               u32 win_used = max(tp->snd_cwnd_used, 2);
+               if (win_used < tp->snd_cwnd) {
+                       tp->snd_ssthresh = tcp_current_ssthresh(tp);
+                       tp->snd_cwnd = (tp->snd_cwnd+win_used)>>1;
+               }
+               tp->snd_cwnd_used = 0;
+       }
+       tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
+
 /* When incoming ACK allowed to free some skb from write_queue,
- * we remember this in flag tp->sorry and wake up socket on the exit
- * from tcp input handler. Probably, handler has already eat this space
- * sending ACK and cloned frames from tcp_write_xmit().
+ * we remember this event in flag tp->queue_shrunk and wake up socket
+ * on the exit from tcp input handler.
  */
-static __inline__ void tcp_new_space(struct sock *sk)
+static void tcp_new_space(struct sock *sk)
 {
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-       struct socket *sock;
 
-       tp->sorry = 0;
+       if (tp->packets_out < tp->snd_cwnd &&
+           !(sk->userlocks&SOCK_SNDBUF_LOCK) &&
+           !tcp_memory_pressure &&
+           atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
+               int sndmem, demanded;
+
+               sndmem = tp->mss_clamp+MAX_TCP_HEADER+16+sizeof(struct sk_buff);
+               demanded = max(tp->snd_cwnd, tp->reordering+1);
+               sndmem *= 2*demanded;
+               if (sndmem > sk->sndbuf)
+                       sk->sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
+               tp->snd_cwnd_stamp = tcp_time_stamp;
+       }
+
+       /* Wakeup users. */
+       if (tcp_wspace(sk) >= tcp_min_write_space(sk)) {
+               struct socket *sock = sk->socket;
 
-       if (sock_wspace(sk) >= tcp_min_write_space(sk) &&
-           (sock = sk->socket) != NULL) {
                clear_bit(SOCK_NOSPACE, &sock->flags);
 
                if (sk->sleep && waitqueue_active(sk->sleep))
                        wake_up_interruptible(sk->sleep);
 
-               if (sock->fasync_list)
+               if (sock->fasync_list && !(sk->shutdown&SEND_SHUTDOWN))
                        sock_wake_async(sock, 2, POLL_OUT);
+
+               /* Satisfy those who hook write_space() callback. */
+               if (sk->write_space != tcp_write_space)
+                       sk->write_space(sk);
+       }
+}
+
+static inline void tcp_check_space(struct sock *sk)
+{
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+       if (tp->queue_shrunk) {
+               tp->queue_shrunk = 0;
+               if (sk->socket && test_bit(SOCK_NOSPACE, &sk->socket->flags))
+                       tcp_new_space(sk);
        }
 }
 
@@ -2118,7 +2887,8 @@ static __inline__ void tcp_data_snd_check(struct sock *sk)
        struct sk_buff *skb = sk->tp_pinfo.af_tcp.send_head;
 
        if (skb != NULL)
-               __tcp_data_snd_check(sk, skb); 
+               __tcp_data_snd_check(sk, skb);
+       tcp_check_space(sk);
 }
 
 /*
@@ -2128,32 +2898,15 @@ static __inline__ void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 {
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
-       /* This also takes care of updating the window.
-        * This if statement needs to be simplified.
-        *
-        * Rules for delaying an ack:
-        *      - delay time <= 0.5 HZ
-        *      - we don't have a window update to send
-        *      - must send at least every 2 full sized packets
-        *      - must send an ACK if we have any out of order data
-        *
-        * With an extra heuristic to handle loss of packet
-        * situations and also helping the sender leave slow
-        * start in an expediant manner.
-        */
-
-           /* More than one full frame received or... */
+           /* More than one full frame received... */
        if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss
-#ifdef TCP_MORE_COARSE_ACKS
-            /* Avoid to send immediate ACK from input path, if it
-             * does not advance window far enough. tcp_recvmsg() will do this.
+            /* ... and right edge of window advances far enough.
+             * (tcp_recvmsg() will send ACK otherwise). Or...
              */
-            && (!sysctl_tcp_retrans_collapse || __tcp_select_window(sk) >= tp->rcv_wnd)
-#endif
-            ) ||
+            && __tcp_select_window(sk) >= tp->rcv_wnd) ||
            /* We ACK each frame or... */
            tcp_in_quickack_mode(tp) ||
-           /* We have out of order data or */
+           /* We have out of order data. */
            (ofo_possible &&
             skb_peek(&tp->out_of_order_queue) != NULL)) {
                /* Then ack it now */
@@ -2167,14 +2920,13 @@ static __inline__ void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
 static __inline__ void tcp_ack_snd_check(struct sock *sk)
 {
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-       if (tp->ack.pending == 0) {
+       if (!tcp_ack_scheduled(tp)) {
                /* We sent a data segment already. */
                return;
        }
        __tcp_ack_snd_check(sk, 1);
 }
 
-
 /*
  *     This routine is only called when we have urgent data
  *     signalled. Its the 'slow' part of tcp_urg. It could be
@@ -2248,92 +3000,6 @@ static inline void tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long len
        }
 }
 
-/* Clean the out_of_order queue if we can, trying to get
- * the socket within its memory limits again.
- *
- * Return less than zero if we should start dropping frames
- * until the socket owning process reads some of the data
- * to stabilize the situation.
- */
-static int prune_queue(struct sock *sk)
-{
-       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; 
-       struct sk_buff *skb;
-       int pruned = 0;
-
-       SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
-
-       NET_INC_STATS_BH(PruneCalled);
-
-       /* First, purge the out_of_order queue. */
-       skb = __skb_dequeue_tail(&tp->out_of_order_queue);
-       if(skb != NULL) {
-               /* Free it all. */
-               do {
-                       pruned += skb->len;
-                       net_statistics[smp_processor_id()*2].OfoPruned += skb->len; 
-                       kfree_skb(skb);
-                       skb = __skb_dequeue_tail(&tp->out_of_order_queue);
-               } while(skb != NULL);
-
-               /* Reset SACK state.  A conforming SACK implementation will
-                * do the same at a timeout based retransmit.  When a connection
-                * is in a sad state like this, we care only about integrity
-                * of the connection not performance.
-                */
-               if(tp->sack_ok)
-                       tp->num_sacks = 0;
-       }
-       
-       /* If we are really being abused, tell the caller to silently
-        * drop receive data on the floor.  It will get retransmitted
-        * and hopefully then we'll have sufficient space.
-        *
-        * We used to try to purge the in-order packets too, but that
-        * turns out to be deadly and fraught with races.  Consider:
-        *
-        * 1) If we acked the data, we absolutely cannot drop the
-        *    packet.  This data would then never be retransmitted.
-        * 2) It is possible, with a proper sequence of events involving
-        *    delayed acks and backlog queue handling, to have the user
-        *    read the data before it gets acked.  The previous code
-        *    here got this wrong, and it lead to data corruption.
-        * 3) Too much state changes happen when the FIN arrives, so once
-        *    we've seen that we can't remove any in-order data safely.
-        *
-        * The net result is that removing in-order receive data is too
-        * complex for anyones sanity.  So we don't do it anymore.  But
-        * if we are really having our buffer space abused we stop accepting
-        * new receive data.
-        *
-        * 8) The arguments are interesting, but I even cannot imagine
-        * what kind of arguments could force us to drop NICE, ALREADY
-        * RECEIVED DATA only to get one more packet? --ANK
-        *
-        * FIXME: it should recompute SACK state and only remove enough
-        *        buffers to get into bounds again. The current scheme loses
-        *        badly sometimes on links with large RTT, especially when 
-        *        the driver has high overhead per skb.
-        *        (increasing the rcvbuf is not enough because it inflates the
-        *         the window too, disabling flow control effectively) -AK
-        *
-        *        Mmm... Why not to scale it seprately then? Just replace
-        *        / WINDOW_ADVERTISE_DIVISOR with >> sk->window_advertise_scale
-        *        and adjust it dynamically, when TCP window flow control
-        *        fails?                                                -ANK
-        */
-
-       tp->ack.quick = 0;
-
-       if(atomic_read(&sk->rmem_alloc) < (sk->rcvbuf << 1))
-               return 0;
-
-       NET_INC_STATS_BH(RcvPruned);
-
-       /* Massive buffer overcommit. */
-       return -1;
-}
-
 static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
 {
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
@@ -2454,9 +3120,6 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
         *      We do checksum and copy also but from device to kernel.
         */
 
-       /* RED-PEN. Using static variables to pass function arguments
-        * cannot be good idea...
-        */
        tp->saw_tstamp = 0;
 
        /*      pred_flags is 0xS?10 << 16 + snd_wnd
@@ -2468,7 +3131,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
         *      PSH flag is ignored.
         */
 
-       if ((tcp_flag_word(th) & ~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) == tp->pred_flags &&
+       if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
                TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
                int tcp_header_len = tp->tcp_header_len;
 
@@ -2500,10 +3163,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                         * seq == rcv_nxt and rcv_wup <= rcv_nxt.
                         * Hence, check seq<=rcv_wup reduces to:
                         */
-                       if (tp->rcv_nxt == tp->rcv_wup) {
-                               tp->ts_recent = tp->rcv_tsval;
-                               tp->ts_recent_stamp = xtime.tv_sec;
-                       }
+                       if (tp->rcv_nxt == tp->rcv_wup)
+                               tcp_store_ts_recent(tp);
                }
 
                if (len <= tcp_header_len) {
@@ -2512,18 +3173,15 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                /* We know that such packets are checksummed
                                 * on entry.
                                 */
-                               tcp_ack(sk, th, TCP_SKB_CB(skb)->seq,
-                                       TCP_SKB_CB(skb)->ack_seq, len); 
-                               kfree_skb(skb); 
+                               tcp_ack(sk, skb, 0);
+                               __kfree_skb(skb); 
                                tcp_data_snd_check(sk);
-                               if (tp->sorry)
-                                       tcp_new_space(sk);
                                return 0;
                        } else { /* Header too small */
                                TCP_INC_STATS_BH(TcpInErrs);
                                goto discard;
                        }
-               } else if (TCP_SKB_CB(skb)->ack_seq == tp->snd_una) {
+               } else {
                        int eaten = 0;
 
                        if (tp->ucopy.task == current &&
@@ -2546,67 +3204,59 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
                                if (tcp_checksum_complete_user(sk, skb))
                                        goto csum_error;
 
-                               if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf)
+                               if ((int)skb->truesize > sk->forward_alloc)
                                        goto step5;
 
                                NET_INC_STATS_BH(TCPHPHits);
 
                                /* Bulk data transfer: receiver */
                                __skb_pull(skb,tcp_header_len);
-
-                               /* DO NOT notify forward progress here.
-                                * It saves dozen of CPU instructions in fast path. --ANK
-                                * And where is it signaled then ? -AK
-                                * Nowhere. 8) --ANK
-                                */
                                __skb_queue_tail(&sk->receive_queue, skb);
-                               skb_set_owner_r(skb, sk);
-
+                               tcp_set_owner_r(skb, sk);
                                tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-
-                               /* FIN bit check is not done since if FIN is set in
-                                * this frame, the pred_flags won't match up. -DaveM
-                                */
-                               sk->data_ready(sk, 0);
                        }
 
-                       tcp_event_data_recv(tp, skb);
+                       tcp_event_data_recv(sk, tp, skb);
+
+                       if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
+                               /* Well, only one small jumplet in fast path... */
+                               tcp_ack(sk, skb, FLAG_DATA);
+                               tcp_data_snd_check(sk);
+                               if (!tcp_ack_scheduled(tp))
+                                       goto no_ack;
+                       }
 
-#ifdef TCP_MORE_COARSE_ACKS
                        if (eaten) {
                                if (tcp_in_quickack_mode(tp)) {
                                        tcp_send_ack(sk);
                                } else {
                                        tcp_send_delayed_ack(sk);
                                }
-                       } else
-#endif
-                       __tcp_ack_snd_check(sk, 0);
+                       } else {
+                               __tcp_ack_snd_check(sk, 0);
+                       }
 
+no_ack:
                        if (eaten)
-                               kfree_skb(skb);
+                               __kfree_skb(skb);
+                       else
+                               sk->data_ready(sk, 0);
                        return 0;
                }
-               /* Packet is in sequence, flags are trivial;
-                * only ACK is strange. Jump to step 5.
-                */
-               if (tcp_checksum_complete_user(sk, skb))
-                       goto csum_error;
-               goto step5;
        }
 
 slow_path:
-       if (tcp_checksum_complete_user(sk, skb))
+       if (len < (th->doff<<2) || tcp_checksum_complete_user(sk, skb))
                goto csum_error;
 
        /*
         * RFC1323: H1. Apply PAWS check first.
         */
-       if (tcp_fast_parse_options(sk, th, tp) && tp->saw_tstamp &&
+       if (tcp_fast_parse_options(skb, th, tp) && tp->saw_tstamp &&
            tcp_paws_discard(tp, skb)) {
                if (!th->rst) {
                        NET_INC_STATS_BH(PAWSEstabRejected);
-                       tcp_send_ack(sk);
+                       tcp_send_dupack(sk, skb);
                        goto discard;
                }
                /* Resets are accepted even if PAWS failed.
@@ -2620,23 +3270,15 @@ slow_path:
         *      Standard slow path.
         */
 
-       if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
+       if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, th->rst)) {
                /* RFC793, page 37: "In all states except SYN-SENT, all reset
                 * (RST) segments are validated by checking their SEQ-fields."
                 * And page 69: "If an incoming segment is not acceptable,
                 * an acknowledgment should be sent in reply (unless the RST bit
                 * is set, if so drop the segment and return)".
                 */
-               if (th->rst)
-                       goto discard;
-               if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
-                       SOCK_DEBUG(sk, "seq:%d end:%d wup:%d wnd:%d\n",
-                                  TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
-                                  tp->rcv_wup, tp->rcv_wnd);
-               }
-               tcp_enter_quickack_mode(tp);
-               tcp_send_ack(sk);
-               NET_INC_STATS_BH(DelayedACKLost);
+               if (!th->rst)
+                       tcp_send_dupack(sk, skb);
                goto discard;
        }
 
@@ -2645,378 +3287,43 @@ slow_path:
                goto discard;
        }
 
-       if (tp->saw_tstamp) {
-               tcp_replace_ts_recent(sk, tp,
-                                     TCP_SKB_CB(skb)->seq);
-       }
+       tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
 
        if(th->syn && TCP_SKB_CB(skb)->seq != tp->syn_seq) {
-               SOCK_DEBUG(sk, "syn in established state\n");
                TCP_INC_STATS_BH(TcpInErrs);
+               NET_INC_STATS_BH(TCPAbortOnSyn);
                tcp_reset(sk);
                return 1;
        }
 
 step5:
        if(th->ack)
-               tcp_ack(sk, th, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->ack_seq, len);
-       
+               tcp_ack(sk, skb, FLAG_SLOWPATH);
+
        /* Process urgent data. */
        tcp_urg(sk, th, len);
 
        /* step 7: process the segment text */
        tcp_data(skb, sk, len);
 
-       /* Be careful, tcp_data() may have put this into TIME_WAIT. */
-       if(sk->state != TCP_CLOSE) {
-               tcp_data_snd_check(sk);
-               tcp_ack_snd_check(sk);
-               if (tp->sorry)
-                       tcp_new_space(sk);
-       }
-
+       tcp_data_snd_check(sk);
+       tcp_ack_snd_check(sk);
        return 0;
 
 csum_error:
        TCP_INC_STATS_BH(TcpInErrs);
 
 discard:
-       kfree_skb(skb);
+       __kfree_skb(skb);
        return 0;
 }
 
-
-/* This is not only more efficient than what we used to do, it eliminates
- * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
- *
- * Actually, we could lots of memory writes here. tp of listening
- * socket contains all necessary default parameters.
- */
-struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, struct sk_buff *skb)
-{
-       struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, 0);
-
-       if(newsk != NULL) {
-               struct tcp_opt *newtp;
-#ifdef CONFIG_FILTER
-               struct sk_filter *filter;
-#endif
-
-               memcpy(newsk, sk, sizeof(*newsk));
-               newsk->state = TCP_SYN_RECV;
-
-               /* SANITY */
-               newsk->pprev = NULL;
-               newsk->prev = NULL;
-
-               /* Clone the TCP header template */
-               newsk->dport = req->rmt_port;
-
-               sock_lock_init(newsk);
-               bh_lock_sock(newsk);
-
-               atomic_set(&newsk->rmem_alloc, 0);
-               skb_queue_head_init(&newsk->receive_queue);
-               atomic_set(&newsk->wmem_alloc, 0);
-               skb_queue_head_init(&newsk->write_queue);
-               atomic_set(&newsk->omem_alloc, 0);
-
-               newsk->done = 0;
-               newsk->proc = 0;
-               newsk->backlog.head = newsk->backlog.tail = NULL;
-               skb_queue_head_init(&newsk->error_queue);
-               newsk->write_space = tcp_write_space;
-#ifdef CONFIG_FILTER
-               if ((filter = newsk->filter) != NULL)
-                       sk_filter_charge(newsk, filter);
-#endif
-
-               /* Now setup tcp_opt */
-               newtp = &(newsk->tp_pinfo.af_tcp);
-               newtp->pred_flags = 0;
-               newtp->rcv_nxt = req->rcv_isn + 1;
-               newtp->snd_nxt = req->snt_isn + 1;
-               newtp->snd_una = req->snt_isn + 1;
-               newtp->snd_sml = req->snt_isn + 1;
-
-               tcp_delack_init(newtp);
-               if (skb->len >= 536)
-                       newtp->ack.last_seg_size = skb->len;
-
-               tcp_prequeue_init(newtp);
-
-               newtp->snd_wl1 = req->rcv_isn;
-               newtp->snd_wl2 = req->snt_isn;
-
-               newtp->retransmits = 0;
-               newtp->backoff = 0;
-               newtp->srtt = 0;
-               newtp->mdev = TCP_TIMEOUT_INIT;
-               newtp->rto = TCP_TIMEOUT_INIT;
-
-               newtp->packets_out = 0;
-               newtp->fackets_out = 0;
-               newtp->retrans_out = 0;
-               newtp->snd_ssthresh = 0x7fffffff;
-
-               /* So many TCP implementations out there (incorrectly) count the
-                * initial SYN frame in their delayed-ACK and congestion control
-                * algorithms that we must have the following bandaid to talk
-                * efficiently to them.  -DaveM
-                */
-               newtp->snd_cwnd = 2;
-               newtp->snd_cwnd_cnt = 0;
-               newtp->high_seq = 0;
-
-               newtp->dup_acks = 0;
-               tcp_init_xmit_timers(newsk);
-               skb_queue_head_init(&newtp->out_of_order_queue);
-               newtp->send_head = newtp->retrans_head = NULL;
-               newtp->rcv_wup = req->rcv_isn + 1;
-               newtp->write_seq = req->snt_isn + 1;
-               newtp->copied_seq = req->rcv_isn + 1;
-
-               newtp->saw_tstamp = 0;
-
-               newtp->probes_out = 0;
-               newtp->num_sacks = 0;
-               newtp->syn_seq = req->rcv_isn;
-               newtp->fin_seq = req->rcv_isn;
-               newtp->urg_data = 0;
-               newtp->listen_opt = NULL;
-               newtp->accept_queue = newtp->accept_queue_tail = NULL;
-               /* Deinitialize syn_wait_lock to trap illegal accesses. */
-               memset(&newtp->syn_wait_lock, 0, sizeof(newtp->syn_wait_lock));
-
-               /* Back to base struct sock members. */
-               newsk->err = 0;
-               newsk->priority = 0;
-               atomic_set(&newsk->refcnt, 1);
-#ifdef INET_REFCNT_DEBUG
-               atomic_inc(&inet_sock_nr);
-#endif
-
-               if (newsk->keepopen)
-                       tcp_reset_keepalive_timer(newsk, keepalive_time_when(newtp));
-               newsk->socket = NULL;
-               newsk->sleep = NULL;
-
-               newtp->tstamp_ok = req->tstamp_ok;
-               if((newtp->sack_ok = req->sack_ok) != 0)
-                       newtp->num_sacks = 0;
-               newtp->window_clamp = req->window_clamp;
-               newtp->rcv_wnd = req->rcv_wnd;
-               newtp->wscale_ok = req->wscale_ok;
-               if (newtp->wscale_ok) {
-                       newtp->snd_wscale = req->snd_wscale;
-                       newtp->rcv_wscale = req->rcv_wscale;
-               } else {
-                       newtp->snd_wscale = newtp->rcv_wscale = 0;
-                       newtp->window_clamp = min(newtp->window_clamp,65535);
-               }
-               newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->snd_wscale;
-               newtp->max_window = newtp->snd_wnd;
-
-               if (newtp->tstamp_ok) {
-                       newtp->ts_recent = req->ts_recent;
-                       newtp->ts_recent_stamp = xtime.tv_sec;
-                       newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
-               } else {
-                       newtp->ts_recent_stamp = 0;
-                       newtp->tcp_header_len = sizeof(struct tcphdr);
-               }
-               newtp->mss_clamp = req->mss;
-       }
-       return newsk;
-}
-
-/* 
- *     Process an incoming packet for SYN_RECV sockets represented
- *     as an open_request.
- */
-
-struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
-                          struct open_request *req,
-                          struct open_request **prev)
-{
-       struct tcphdr *th = skb->h.th;
-       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-       u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
-       int paws_reject = 0;
-       struct tcp_opt ttp;
-       struct sock *child;
-
-       ttp.saw_tstamp = 0;
-       if (th->doff > (sizeof(struct tcphdr)>>2)) {
-               tcp_parse_options(NULL, th, &ttp, 0);
-
-               if (ttp.saw_tstamp) {
-                       ttp.ts_recent = req->ts_recent;
-                       /* We do not store true stamp, but it is not required,
-                        * it can be estimated (approximately)
-                        * from another data.
-                        */
-                       ttp.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
-                       paws_reject = tcp_paws_check(&ttp, th->rst);
-               }
-       }
-
-       /* Check for pure retransmited SYN. */
-       if (TCP_SKB_CB(skb)->seq == req->rcv_isn &&
-           flg == TCP_FLAG_SYN &&
-           !paws_reject) {
-               /*
-                * RFC793 draws (Incorrectly! It was fixed in RFC1122)
-                * this case on figure 6 and figure 8, but formal
-                * protocol description says NOTHING.
-                * To be more exact, it says that we should send ACK,
-                * because this segment (at least, if it has no data)
-                * is out of window.
-                *
-                *  CONCLUSION: RFC793 (even with RFC1122) DOES NOT
-                *  describe SYN-RECV state. All the description
-                *  is wrong, we cannot believe to it and should
-                *  rely only on common sense and implementation
-                *  experience.
-                *
-                * Enforce "SYN-ACK" according to figure 8, figure 6
-                * of RFC793, fixed by RFC1122.
-                */
-               req->class->rtx_syn_ack(sk, req, NULL);
-               return NULL;
-       }
-
-       /* Further reproduces section "SEGMENT ARRIVES"
-          for state SYN-RECEIVED of RFC793.
-          It is broken, however, it does not work only
-          when SYNs are crossed, which is impossible in our
-          case.
-
-          But generally, we should (RFC lies!) to accept ACK
-          from SYNACK both here and in tcp_rcv_state_process().
-          tcp_rcv_state_process() does not, hence, we do not too.
-
-          Note that the case is absolutely generic:
-          we cannot optimize anything here without
-          violating protocol. All the checks must be made
-          before attempt to create socket.
-        */
-
-       /* RFC793: "first check sequence number". */
-
-       if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
-                                         req->rcv_isn+1, req->rcv_isn+1+req->rcv_wnd)) {
-               /* Out of window: send ACK and drop. */
-               if (!(flg & TCP_FLAG_RST))
-                       req->class->send_ack(skb, req);
-               if (paws_reject)
-                       NET_INC_STATS_BH(PAWSEstabRejected);
-               return NULL;
-       }
-
-       /* In sequence, PAWS is OK. */
-
-       if (ttp.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
-               req->ts_recent = ttp.rcv_tsval;
-
-       if (TCP_SKB_CB(skb)->seq == req->rcv_isn) {
-               /* Truncate SYN, it is out of window starting
-                  at req->rcv_isn+1. */
-               flg &= ~TCP_FLAG_SYN;
-       }
-
-       /* RFC793: "second check the RST bit" and
-        *         "fourth, check the SYN bit"
-        */
-       if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))
-               goto embryonic_reset;
-
-       /* RFC793: "fifth check the ACK field" */
-
-       if (!(flg & TCP_FLAG_ACK))
-               return NULL;
-
-       /* Invalid ACK: reset will be sent by listening socket */
-       if (TCP_SKB_CB(skb)->ack_seq != req->snt_isn+1)
-               return sk;
-       /* Also, it would be not so bad idea to check rcv_tsecr, which
-        * is essentially ACK extension and too early or too late values
-        * should cause reset in unsynchronized states.
-        */
-
-       /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
-       if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) {
-               req->acked = 1;
-               return NULL;
-       }
-
-       /* OK, ACK is valid, create big socket and
-        * feed this segment to it. It will repeat all
-        * the tests. THIS SEGMENT MUST MOVE SOCKET TO
-        * ESTABLISHED STATE. If it will be dropped after
-        * socket is created, wait for troubles.
-        */
-       child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
-       if (child == NULL)
-               goto listen_overflow;
-
-       tcp_synq_unlink(tp, req, prev);
-       tcp_synq_removed(sk, req);
-
-       tcp_acceptq_queue(sk, req, child);
-       return child;
-
-listen_overflow:
-       if (!sysctl_tcp_abort_on_overflow) {
-               req->acked = 1;
-               return NULL;
-       }
-
-embryonic_reset:
-       NET_INC_STATS_BH(EmbryonicRsts);
-       if (!(flg & TCP_FLAG_RST))
-               req->class->send_reset(skb);
-
-       tcp_synq_drop(sk, req, prev);
-       return NULL;
-}
-
-/*
- * Queue segment on the new socket if the new socket is active,
- * otherwise we just shortcircuit this and continue with
- * the new socket.
- */
-
-int tcp_child_process(struct sock *parent, struct sock *child,
-                     struct sk_buff *skb)
-{
-       int ret = 0;
-       int state = child->state;
-
-       if (child->lock.users == 0) {
-               ret = tcp_rcv_state_process(child, skb, skb->h.th, skb->len);
-
-               /* Wakeup parent, send SIGIO */
-               if (state == TCP_SYN_RECV && child->state != state)
-                       parent->data_ready(parent, 0);
-       } else {
-               /* Alas, it is possible again, because we do lookup
-                * in main socket hash table and lock on listening
-                * socket does not protect us more.
-                */
-               sk_add_backlog(child, skb);
-       }
-
-       bh_unlock_sock(child);
-       return ret;
-}
-
 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                                         struct tcphdr *th, unsigned len)
 {
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
-       tcp_parse_options(sk, th, tp, 0);
+       tcp_parse_options(skb, tp);
 
        if (th->ack) {
                /* rfc793:
@@ -3027,24 +3334,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                 *        a reset (unless the RST bit is set, if so drop
                 *        the segment and return)"
                 *
-                *  I cite this place to emphasize one essential
-                *  detail, this check is different of one
-                *  in established state: SND.UNA <= SEG.ACK <= SND.NXT.
-                *  SEG_ACK == SND.UNA == ISS is invalid in SYN-SENT,
-                *  because we have no previous data sent before SYN.
-                *                                        --ANK(990513)
-                *
                 *  We do not send data with SYN, so that RFC-correct
                 *  test reduces to:
                 */
                if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
                        return 1;
 
-               /* Check not from any RFC, but it is evident consequence
-                * of combining PAWS and usual SYN-SENT logic: ACK _is_
-                * checked in SYN-SENT unlike another states, hence
-                * echoed tstamp must be checked too.
-                */
                if (tp->saw_tstamp) {
                        if (tp->rcv_tsecr == 0) {
                                /* Workaround for bug in linux-2.1 and early
@@ -3055,13 +3350,9 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                                tp->saw_tstamp = 0;
 
                                /* But do not forget to store peer's timestamp! */
-                               if (th->syn) {
-                                       tp->ts_recent = tp->rcv_tsval;
-                                       tp->ts_recent_stamp = xtime.tv_sec;
-                               }
-                       } else if ((__s32)(tp->rcv_tsecr - tcp_time_stamp) > 0 ||
-                                  (__s32)(tp->rcv_tsecr - tp->syn_stamp) < 0) {
-                               NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "TCP: synsent reject.\n"));
+                               if (th->syn)
+                                       tcp_store_ts_recent(tp);
+                       } else if (!between(tp->rcv_tsecr, tp->retrans_stamp, tcp_time_stamp)) {
                                NET_INC_STATS_BH(PAWSActiveRejected);
                                return 1;
                        }
@@ -3095,30 +3386,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                 *    are acceptable then ...
                 *    (our SYN has been ACKed), change the connection
                 *    state to ESTABLISHED..."
-                *
-                * Do you see? SYN-less ACKs in SYN-SENT state are
-                * completely ignored.
-                *
-                * The bug causing stalled SYN-SENT sockets
-                * was here: tcp_ack advanced snd_una and canceled
-                * retransmit timer, so that bare ACK received
-                * in SYN-SENT state (even with invalid ack==ISS,
-                * because tcp_ack check is too weak for SYN-SENT)
-                * causes moving socket to invalid semi-SYN-SENT,
-                * semi-ESTABLISHED state and connection hangs.
-                *                                     --ANK (990514)
-                *
-                * Bare ACK is valid, however.
-                * Actually, RFC793 requires to send such ACK
-                * in reply to any out of window packet.
-                * It is wrong, but Linux also send such
-                * useless ACKs sometimes.
-                *                                     --ANK (990724)
                 */
 
+               TCP_ECN_rcv_synack(tp, th);
+
                tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
-               tcp_ack(sk,th, TCP_SKB_CB(skb)->seq,
-                       TCP_SKB_CB(skb)->ack_seq, len);
+               tcp_ack(sk, skb, FLAG_SLOWPATH);
 
                /* Ok.. it's good. Set up sequence numbers and
                 * move to established.
@@ -3130,12 +3403,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                 * never scaled.
                 */
                tp->snd_wnd = ntohs(th->window);
-               tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
-               tp->snd_wl2 = TCP_SKB_CB(skb)->ack_seq;
+               tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq);
+               tp->syn_seq = TCP_SKB_CB(skb)->seq;
                tp->fin_seq = TCP_SKB_CB(skb)->seq;
 
-               tcp_set_state(sk, TCP_ESTABLISHED);
-
                if (tp->wscale_ok == 0) {
                        tp->snd_wscale = tp->rcv_wscale = 0;
                        tp->window_clamp = min(tp->window_clamp,65535);
@@ -3144,12 +3415,14 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                if (tp->tstamp_ok) {
                        tp->tcp_header_len =
                                sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+                       tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
                } else
                        tp->tcp_header_len = sizeof(struct tcphdr);
-               if (tp->saw_tstamp) {
-                       tp->ts_recent = tp->rcv_tsval;
-                       tp->ts_recent_stamp = xtime.tv_sec;
-               }
+               if (tp->saw_tstamp)
+                       tcp_store_ts_recent(tp);
+               if (tp->sack_ok && sysctl_tcp_fack)
+                       tp->sack_ok |= 2;
+
                tcp_sync_mss(sk, tp->pmtu_cookie);
                tcp_initialize_rcv_mss(sk);
                tcp_init_metrics(sk);
@@ -3158,15 +3431,24 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                if (sk->keepopen)
                        tcp_reset_keepalive_timer(sk, keepalive_time_when(tp));
 
+               if (tp->snd_wscale == 0)
+                       __tcp_fast_path_on(tp, tp->snd_wnd);
+               else
+                       tp->pred_flags = 0;
+
+               /* Remember, tcp_poll() does not lock socket!
+                * Change state from SYN-SENT only after copied_seq
+                * is initilized. */
                tp->copied_seq = tp->rcv_nxt;
-               __tcp_fast_path_on(tp, tp->snd_wnd);
+               mb();
+               tcp_set_state(sk, TCP_ESTABLISHED);
 
                if(!sk->dead) {
                        sk->state_change(sk);
                        sk_wake_async(sk, 0, POLL_OUT);
                }
 
-               if (tp->write_pending) {
+               if (tp->write_pending || tp->defer_accept) {
                        /* Save one ACK. Data will be ready after
                         * several ticks, if write_pending is set.
                         *
@@ -3174,11 +3456,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                         * look so _wonderfully_ clever, that I was not able
                         * to stand against the temptation 8)     --ANK
                         */
-                       tp->ack.pending = 1;
+                       tcp_schedule_ack(tp);
                        tp->ack.lrcvtime = tcp_time_stamp;
                        tcp_enter_quickack_mode(tp);
-                       tp->ack.ato = TCP_ATO_MIN;
-                       tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MIN);
+                       tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX);
                        goto discard;
                } else {
                        tcp_send_ack(sk);
@@ -3204,20 +3485,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 
        if (th->syn) {
                /* We see SYN without ACK. It is attempt of
-                *  simultaneous connect with crossed SYNs.
-                *
-                * The previous version of the code
-                * checked for "connecting to self"
-                * here. that check is done now in
-                * tcp_connect.
-                *
-                * RED-PEN: BTW, it does not. 8)
+                * simultaneous connect with crossed SYNs.
+                * Particularly, it can be connect to self.
                 */
                tcp_set_state(sk, TCP_SYN_RECV);
-               if (tp->saw_tstamp) {
-                       tp->ts_recent = tp->rcv_tsval;
-                       tp->ts_recent_stamp = xtime.tv_sec;
-               }
+               if (tp->saw_tstamp)
+                       tcp_store_ts_recent(tp);
 
                tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
                tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
@@ -3232,6 +3505,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                tcp_sync_mss(sk, tp->pmtu_cookie);
                tcp_initialize_rcv_mss(sk);
 
+               TCP_ECN_rcv_syn(tp, th);
+
                tcp_send_synack(sk);
 #if 0
                /* Note, we could accept data and URG from this segment.
@@ -3251,7 +3526,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
         */
 
 discard:
-       kfree_skb(skb);
+       __kfree_skb(skb);
        return 0;
 }
 
@@ -3273,35 +3548,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 
        switch (sk->state) {
        case TCP_CLOSE:
-               /* When state == CLOSED, hash lookup always fails.
-                *
-                * But, there is a back door, the backlog queue.
-                * If we have a sequence of packets in the backlog
-                * during __release_sock() which have a sequence such
-                * that:
-                *      packet X        causes entry to TCP_CLOSE state
-                *      ...
-                *      packet X + N    has FIN bit set
-                *
-                * We report a (luckily) harmless error in this case.
-                * The issue is that backlog queue processing bypasses
-                * any hash lookups (we know which socket packets are for).
-                * The correct behavior here is what 2.0.x did, since
-                * a TCP_CLOSE socket does not exist.  Drop the frame
-                * and send a RST back to the other end.
-                */
-
-               /* 1. The socket may be moved to TIME-WAIT state.
-                  2. While this socket was locked, another socket
-                     with the same identity could be created.
-                  3. To continue?
-
-                  CONCLUSION: discard and only discard!
-
-                  Alternative would be relookup and recurse into tcp_v?_rcv
-                  (not *_do_rcv) to work with timewait and listen states
-                  correctly.
-                */
                goto discard;
 
        case TCP_LISTEN:
@@ -3340,56 +3586,20 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                goto step6;
        }
 
-       /*   Parse the tcp_options present on this header.
-        *   By this point we really only expect timestamps.
-        *   Note that this really has to be here and not later for PAWS
-        *   (RFC1323) to work.
-        */
-       if (tcp_fast_parse_options(sk, th, tp) && tp->saw_tstamp &&
+       if (tcp_fast_parse_options(skb, th, tp) && tp->saw_tstamp &&
            tcp_paws_discard(tp, skb)) {
                if (!th->rst) {
-                       tcp_send_ack(sk);
+                       NET_INC_STATS_BH(PAWSEstabRejected);
+                       tcp_send_dupack(sk, skb);
                        goto discard;
                }
                /* Reset is accepted even if it did not pass PAWS. */
        }
 
-       /* The silly FIN test here is necessary to see an advancing ACK in
-        * retransmitted FIN frames properly.  Consider the following sequence:
-        *
-        *      host1 --> host2         FIN XSEQ:XSEQ(0) ack YSEQ
-        *      host2 --> host1         FIN YSEQ:YSEQ(0) ack XSEQ
-        *      host1 --> host2         XSEQ:XSEQ(0) ack YSEQ+1
-        *      host2 --> host1         FIN YSEQ:YSEQ(0) ack XSEQ+1     (fails tcp_sequence test)
-        *
-        * At this point the connection will deadlock with host1 believing
-        * that his FIN is never ACK'd, and thus it will retransmit it's FIN
-        * forever.  The following fix is from Taral (taral@taral.net).
-        *
-        * RED-PEN. Seems, the above is not true.
-        * If at least one end is RFC compliant, it will send ACK to
-        * out of window FIN and, hence, move peer to TIME-WAIT.
-        * I comment out this line. --ANK
-        *
-        * RED-PEN. DANGER! tcp_sequence check rejects also SYN-ACKs
-        * received in SYN-RECV. The problem is that description of
-        * segment processing in SYN-RECV state in RFC792 is WRONG.
-        * Correct check would accept ACK from this SYN-ACK, see
-        * figures 6 and 8 (fixed by RFC1122). Compare this
-        * to problem with FIN, they smell similarly. --ANK
-        */
-
        /* step 1: check sequence number */
-       if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)
-#if 0
-           && !(th->fin && TCP_SKB_CB(skb)->end_seq == tp->rcv_nxt)
-#endif
-           ) {
-               if (!th->rst) {
-                       NET_INC_STATS_BH(DelayedACKLost);
-                       tcp_enter_quickack_mode(tp);
-                       tcp_send_ack(sk);
-               }
+       if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, th->rst)) {
+               if (!th->rst)
+                       tcp_send_dupack(sk, skb);
                goto discard;
        }
 
@@ -3399,10 +3609,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                goto discard;
        }
 
-       if (tp->saw_tstamp) {
-               tcp_replace_ts_recent(sk, tp,
-                                     TCP_SKB_CB(skb)->seq);
-       }
+       tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
 
        /* step 3: check security and precedence [ignored] */
 
@@ -3423,47 +3630,51 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
         */
 
        if (th->syn && TCP_SKB_CB(skb)->seq != tp->syn_seq) {
+               NET_INC_STATS_BH(TCPAbortOnSyn);
                tcp_reset(sk);
                return 1;
        }
 
        /* step 5: check the ACK field */
        if (th->ack) {
-               int acceptable = tcp_ack(sk, th, TCP_SKB_CB(skb)->seq,
-                                        TCP_SKB_CB(skb)->ack_seq, len);
+               int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);
 
                switch(sk->state) {
                case TCP_SYN_RECV:
                        if (acceptable) {
-                               tcp_set_state(sk, TCP_ESTABLISHED);
                                tp->copied_seq = tp->rcv_nxt;
+                               mb();
+                               tcp_set_state(sk, TCP_ESTABLISHED);
 
                                /* Note, that this wakeup is only for marginal
                                 * crossed SYN case. Passively open sockets
                                 * are not waked up, because sk->sleep == NULL
                                 * and sk->socket == NULL.
                                 */
-                               if (!sk->dead) {
+                               if (!sk->socket) {
                                        sk->state_change(sk);
                                        sk_wake_async(sk,0,POLL_OUT);
                                }
 
                                tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
                                tp->snd_wnd = ntohs(th->window) << tp->snd_wscale;
-                               tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
-                               tp->snd_wl2 = TCP_SKB_CB(skb)->ack_seq;
+                               tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq);
 
                                /* tcp_ack considers this ACK as duplicate
                                 * and does not calculate rtt.
                                 * Fix it at least with timestamps.
                                 */
                                if (tp->saw_tstamp && !tp->srtt)
-                                       tcp_ack_saw_tstamp(sk, tp, 0, 0, FLAG_SYN_ACKED);
+                                       tcp_ack_saw_tstamp(tp);
+
+                               if (tp->tstamp_ok)
+                                       tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
 
                                tcp_init_metrics(sk);
+                               tcp_initialize_rcv_mss(sk);
+                               tcp_init_buffer_space(sk);
                                tcp_fast_path_on(tp);
                        } else {
-                               SOCK_DEBUG(sk, "bad ack\n");
                                return 1;
                        }
                        break;
@@ -3484,6 +3695,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                                            (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
                                             after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
                                                tcp_done(sk);
+                                               NET_INC_STATS_BH(TCPAbortOnData);
                                                return 1;
                                        }
 
@@ -3543,6 +3755,7 @@ step6:
                if (sk->shutdown & RCV_SHUTDOWN) {
                        if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
                            after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
+                               NET_INC_STATS_BH(TCPAbortOnData);
                                tcp_reset(sk);
                                return 1;
                        }
@@ -3558,13 +3771,11 @@ step6:
        if (sk->state != TCP_CLOSE) {
                tcp_data_snd_check(sk);
                tcp_ack_snd_check(sk);
-               if (tp->sorry)
-                       tcp_new_space(sk);
        }
 
        if (!queued) { 
 discard:
-               kfree_skb(skb);
+               __kfree_skb(skb);
        }
        return 0;
 }
index d9f7dd80d9027f9543cac8426990ab7c55d5d451..85a2e4707013ba54f1053056d4e4e4fbbcf58cd5 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:    $Id: tcp_ipv4.c,v 1.210 2000/07/26 01:04:19 davem Exp $
+ * Version:    $Id: tcp_ipv4.c,v 1.211 2000/08/09 11:59:04 davem Exp $
  *
  *             IPv4 specific functions
  *
@@ -574,9 +574,8 @@ static int tcp_v4_check_established(struct sock *sk)
                           fall back to VJ's scheme and use initial
                           timestamp retrieved from peer table.
                         */
-                       if (tw->substate == TCP_TIME_WAIT &&
-                           sysctl_tcp_tw_recycle && tw->ts_recent_stamp) {
-                               if ((tp->write_seq = tw->snd_nxt + 2) == 0)
+                       if (tw->ts_recent_stamp) {
+                               if ((tp->write_seq = tw->snd_nxt+65535+2) == 0)
                                        tp->write_seq = 1;
                                tp->ts_recent = tw->ts_recent;
                                tp->ts_recent_stamp = tw->ts_recent_stamp;
@@ -691,7 +690,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                daddr = rt->rt_dst;
 
        err = -ENOBUFS;
-       buff = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 0, GFP_KERNEL);
+       buff = alloc_skb(MAX_TCP_HEADER + 15, GFP_KERNEL);
 
        if (buff == NULL)
                goto failure;
@@ -926,7 +925,7 @@ void tcp_v4_err(struct sk_buff *skb, unsigned char *dp, int len)
                 * we have no reasons to ignore it.
                 */
                if (sk->lock.users == 0)
-                       tcp_enter_cong_avoid(tp);
+                       tcp_enter_cwr(tp);
                goto out;
        case ICMP_PARAMETERPROB:
                err = EPROTO;
@@ -1296,7 +1295,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
        struct tcp_opt tp;
        struct open_request *req;
-       struct tcphdr *th = skb->h.th;
        __u32 saddr = skb->nh.iph->saddr;
        __u32 daddr = skb->nh.iph->daddr;
        __u32 isn = TCP_SKB_CB(skb)->when;
@@ -1341,7 +1339,15 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        tp.mss_clamp = 536;
        tp.user_mss = sk->tp_pinfo.af_tcp.user_mss;
 
-       tcp_parse_options(NULL, th, &tp, want_cookie);
+       tcp_parse_options(skb, &tp);
+
+       if (want_cookie) {
+               tp.sack_ok = 0;
+               tp.wscale_ok = 0;
+               tp.snd_wscale = 0;
+               tp.tstamp_ok = 0;
+               tp.saw_tstamp = 0;
+       }
 
        if (tp.saw_tstamp && tp.rcv_tsval == 0) {
                /* Some OSes (unknown ones, but I see them on web server, which
@@ -1359,6 +1365,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
        req->af.v4_req.rmt_addr = saddr;
        req->af.v4_req.opt = tcp_v4_save_options(sk, skb);
        req->class = &or_ipv4;
+       if (!want_cookie)
+               TCP_ECN_create_request(req, skb->h.th);
 
        if (want_cookie) {
 #ifdef CONFIG_SYN_COOKIES
@@ -1384,8 +1392,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
                    peer->v4daddr == saddr) {
                        if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL &&
                            (s32)(peer->tcp_ts - req->ts_recent) > TCP_PAWS_WINDOW) {
-                               NETDEBUG(printk(KERN_DEBUG "TW_REC: reject openreq %u/%u %u.%u.%u.%u/%u\n", \
-                                       peer->tcp_ts, req->ts_recent, NIPQUAD(saddr), ntohs(skb->h.th->source)));
                                NET_INC_STATS_BH(PAWSPassiveRejected);
                                dst_release(dst);
                                goto drop_and_free;
@@ -1470,10 +1476,8 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
                newtp->ext_header_len = newsk->protinfo.af_inet.opt->optlen;
 
        tcp_sync_mss(newsk, dst->pmtu);
-       tcp_initialize_rcv_mss(newsk);
        newtp->advmss = dst->advmss;
-
-       tcp_init_buffer_space(newsk);
+       tcp_initialize_rcv_mss(newsk);
 
        __tcp_v4_hash(newsk);
        __tcp_inherit_port(sk, newsk);
@@ -1493,33 +1497,30 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk,struct sk_buff *skb)
        struct open_request *req, **prev;
        struct tcphdr *th = skb->h.th;
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       struct sock *nsk;
 
        /* Find possible connection requests. */
        req = tcp_v4_search_req(tp, skb->nh.iph, th, &prev);
        if (req)
                return tcp_check_req(sk, skb, req, prev);
 
-       if (tp->accept_queue) {
-               struct sock *nsk;
-
-               nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr,
-                                                 th->source,
-                                                 skb->nh.iph->daddr,
-                                                 ntohs(th->dest),
-                                                 tcp_v4_iif(skb));
+       nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr,
+                                         th->source,
+                                         skb->nh.iph->daddr,
+                                         ntohs(th->dest),
+                                         tcp_v4_iif(skb));
 
-               if (nsk) {
-                       if (nsk->state != TCP_TIME_WAIT) {
-                               bh_lock_sock(nsk);
-                               return nsk;
-                       }
-                       tcp_tw_put((struct tcp_tw_bucket*)sk);
-                       return NULL;
+       if (nsk) {
+               if (nsk->state != TCP_TIME_WAIT) {
+                       bh_lock_sock(nsk);
+                       return nsk;
                }
+               tcp_tw_put((struct tcp_tw_bucket*)sk);
+               return NULL;
        }
 
 #ifdef CONFIG_SYN_COOKIES
-       if (!th->rst && (th->syn || th->ack))
+       if (!th->rst && !th->syn && th->ack)
                sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
 #endif
        return sk;
@@ -1534,8 +1535,8 @@ static int tcp_v4_checksum_init(struct sk_buff *skb)
                        return -1;
                }
                skb->ip_summed = CHECKSUM_UNNECESSARY;
-       } else if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
-               if (skb->len <= 68) {
+       } else {
+               if (skb->len <= 76) {
                        if (tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr,
                                         skb->nh.iph->daddr,
                                         csum_partial((char *)skb->h.th, skb->len, 0)))
@@ -1576,7 +1577,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
                return 0; 
        }
 
-       if (tcp_checksum_complete(skb))
+       if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
                goto csum_err;
 
        if (sk->state == TCP_LISTEN) { 
@@ -1634,10 +1635,13 @@ int tcp_v4_rcv(struct sk_buff *skb, unsigned short len)
        /* Count it even if it's bad */
        TCP_INC_STATS_BH(TcpInSegs);
 
-       if (len < sizeof(struct tcphdr))
-               goto bad_packet;
-
-       if (tcp_v4_checksum_init(skb) < 0)
+       /* An explanation is required here, I think.
+        * Packet length and doff are validated by header prediction,
+        * provided case of th->doff==0 is elimineted.
+        * So, we defer the checks. */
+       if (th->doff < sizeof(struct tcphdr)/4 ||
+           (skb->ip_summed != CHECKSUM_UNNECESSARY &&
+            tcp_v4_checksum_init(skb) < 0))
                goto bad_packet;
 
        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
@@ -1645,6 +1649,8 @@ int tcp_v4_rcv(struct sk_buff *skb, unsigned short len)
                                    len - th->doff*4);
        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
        TCP_SKB_CB(skb)->when = 0;
+       TCP_SKB_CB(skb)->flags = skb->nh.iph->tos;
+       TCP_SKB_CB(skb)->sacked = 0;
        skb->used = 0;
 
        sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source,
@@ -1674,7 +1680,7 @@ process:
        return ret;
 
 no_tcp_socket:
-       if (tcp_checksum_complete(skb)) {
+       if (len < (th->doff<<2) || tcp_checksum_complete(skb)) {
 bad_packet:
                TCP_INC_STATS_BH(TcpInErrs);
        } else {
@@ -1691,7 +1697,7 @@ discard_and_relse:
        goto discard_it;
 
 do_time_wait:
-       if (tcp_checksum_complete(skb)) {
+       if (len < (th->doff<<2) || tcp_checksum_complete(skb)) {
                TCP_INC_STATS_BH(TcpInErrs);
                goto discard_and_relse;
        }
@@ -1734,7 +1740,8 @@ int tcp_v4_rebuild_header(struct sock *sk)
 {
        struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
        __u32 new_saddr;
-        int want_rewrite = sysctl_ip_dynaddr && sk->state == TCP_SYN_SENT;
+        int want_rewrite = sysctl_ip_dynaddr && sk->state == TCP_SYN_SENT &&
+               !(sk->userlocks & SOCK_BINDADDR_LOCK);
 
        if (rt == NULL) {
                int err;
@@ -1755,11 +1762,7 @@ int tcp_v4_rebuild_header(struct sock *sk)
                __sk_dst_set(sk, &rt->u.dst);
        }
 
-       /* Force route checking if want_rewrite.
-        * The idea is good, the implementation is disguisting.
-        * Well, if I made bind on this socket, you cannot randomly ovewrite
-        * its source address. --ANK
-        */
+       /* Force route checking if want_rewrite. */
        if (want_rewrite) {
                int tmp;
                struct rtable *new_rt;
@@ -1932,12 +1935,19 @@ static int tcp_v4_init_sock(struct sock *sk)
        tp->snd_cwnd_clamp = ~0;
        tp->mss_cache = 536;
 
+       tp->reordering = sysctl_tcp_reordering;
+
        sk->state = TCP_CLOSE;
 
        sk->write_space = tcp_write_space; 
 
        sk->tp_pinfo.af_tcp.af_specific = &ipv4_specific;
 
+       sk->sndbuf = sysctl_tcp_wmem[1];
+       sk->rcvbuf = sysctl_tcp_rmem[1];
+
+       atomic_inc(&tcp_sockets_allocated);
+
        return 0;
 }
 
@@ -1948,7 +1958,7 @@ static int tcp_v4_destroy_sock(struct sock *sk)
        tcp_clear_xmit_timers(sk);
 
        /* Cleanup up the write buffer. */
-       __skb_queue_purge(&sk->write_queue);
+       tcp_writequeue_purge(sk);
 
        /* Cleans up our, hopefuly empty, out_of_order_queue. */
        __skb_queue_purge(&tp->out_of_order_queue);
@@ -1960,11 +1970,13 @@ static int tcp_v4_destroy_sock(struct sock *sk)
        if(sk->prev != NULL)
                tcp_put_port(sk);
 
+       atomic_dec(&tcp_sockets_allocated);
+
        return 0;
 }
 
 /* Proc filesystem TCP sock list dumping. */
-static void get_openreq(struct sock *sk, struct open_request *req, char *tmpbuf, int i)
+static void get_openreq(struct sock *sk, struct open_request *req, char *tmpbuf, int i, int uid)
 {
        int ttd = req->expires - jiffies;
 
@@ -1980,7 +1992,7 @@ static void get_openreq(struct sock *sk, struct open_request *req, char *tmpbuf,
                1,   /* timers active (only the expire timer) */  
                ttd, 
                req->retrans,
-               sk->socket ? sk->socket->inode->i_uid : 0,
+               uid,
                0,  /* non standard timer */  
                0, /* open_requests have no inode */
                atomic_read(&sk->refcnt),
@@ -2000,33 +2012,31 @@ static void get_tcp_sock(struct sock *sp, char *tmpbuf, int i)
        src   = sp->rcv_saddr;
        destp = ntohs(sp->dport);
        srcp  = ntohs(sp->sport);
-       timer_active    = 0;
-       timer_expires   = (unsigned) -1;
-       if (timer_pending(&tp->retransmit_timer) && tp->retransmit_timer.expires < timer_expires) {
+       if (tp->pending == TCP_TIME_RETRANS) {
                timer_active    = 1;
-               timer_expires   = tp->retransmit_timer.expires;
-       } else if (timer_pending(&tp->probe_timer) && tp->probe_timer.expires < timer_expires) {
+               timer_expires   = tp->timeout;
+       } else if (tp->pending == TCP_TIME_PROBE0) {
                timer_active    = 4;
-               timer_expires   = tp->probe_timer.expires;
-       }
-       if (timer_pending(&sp->timer) && sp->timer.expires < timer_expires) {
+               timer_expires   = tp->timeout;
+       } else if (timer_pending(&sp->timer)) {
                timer_active    = 2;
                timer_expires   = sp->timer.expires;
-       }
-       if(timer_active == 0)
+       } else {
+               timer_active    = 0;
                timer_expires = jiffies;
+       }
 
        sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
-               " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u",
+               " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d",
                i, src, srcp, dest, destp, sp->state, 
                tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
                timer_active, timer_expires-jiffies,
                tp->retransmits,
-               sp->socket ? sp->socket->inode->i_uid : 0,
+               sock_i_uid(sp),
                tp->probes_out,
-               sp->socket ? sp->socket->inode->i_ino : 0,
+               sock_i_ino(sp),
                atomic_read(&sp->refcnt), sp,
-               tp->rto, tp->ack.ato, tp->ack.quick, tp->ack.pingpong
+               tp->rto, tp->ack.ato, tp->ack.quick, tp->ack.pingpong, sp->sndbuf
                );
 }
 
@@ -2073,6 +2083,7 @@ int tcp_get_info(char *buffer, char **start, off_t offset, int length)
 
                for (sk = tcp_listening_hash[i]; sk; sk = sk->next, num++) {
                        struct open_request *req;
+                       int uid;
                        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
                        if (!TCP_INET_FAMILY(sk->family))
@@ -2089,6 +2100,7 @@ int tcp_get_info(char *buffer, char **start, off_t offset, int length)
                        }
 
 skip_listen:
+                       uid = sock_i_uid(sk);
                        read_lock_bh(&tp->syn_wait_lock);
                        lopt = tp->listen_opt;
                        if (lopt && lopt->qlen != 0) {
@@ -2100,7 +2112,7 @@ skip_listen:
                                                pos += 128;
                                                if (pos < offset)
                                                        continue;
-                                               get_openreq(sk, req, tmpbuf, num);
+                                               get_openreq(sk, req, tmpbuf, num, uid);
                                                len += sprintf(buffer+len, "%-127s\n", tmpbuf);
                                                if(len >= length) {
                                                        read_unlock_bh(&tp->syn_wait_lock);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
new file mode 100644 (file)
index 0000000..ef7fc36
--- /dev/null
@@ -0,0 +1,970 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:    $Id: tcp_minisocks.c,v 1.1 2000/08/09 11:59:04 davem Exp $
+ *
+ * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
+ *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *             Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *             Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *             Florian La Roche, <flla@stud.uni-sb.de>
+ *             Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *             Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *             Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *             Matthew Dillon, <dillon@apollo.west.oic.com>
+ *             Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *             Jorge Cwik, <jorge@laser.satlink.net>
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/sysctl.h>
+#include <net/tcp.h>
+#include <net/inet_common.h>
+
+#ifdef CONFIG_SYSCTL
+#define SYNC_INIT 0 /* let the user enable it */
+#else
+#define SYNC_INIT 1
+#endif
+
+int sysctl_tcp_tw_recycle = 0;
+int sysctl_tcp_max_tw_buckets = NR_FILE*2;
+
+int sysctl_tcp_syncookies = SYNC_INIT; 
+int sysctl_tcp_abort_on_overflow = 0;
+
+static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
+{
+       if (seq == s_win)
+               return 1;
+       if (after(end_seq, s_win) && before(seq, e_win))
+               return 1;
+       return (seq == e_win && seq == end_seq);
+}
+
+/* New-style handling of TIME_WAIT sockets. */
+
+int tcp_tw_count = 0;
+
+
+/* Must be called with locally disabled BHs. */
+void tcp_timewait_kill(struct tcp_tw_bucket *tw)
+{
+       struct tcp_ehash_bucket *ehead;
+       struct tcp_bind_hashbucket *bhead;
+       struct tcp_bind_bucket *tb;
+
+       /* Unlink from established hashes. */
+       ehead = &tcp_ehash[tw->hashent];
+       write_lock(&ehead->lock);
+       if (!tw->pprev) {
+               write_unlock(&ehead->lock);
+               return;
+       }
+       if(tw->next)
+               tw->next->pprev = tw->pprev;
+       *(tw->pprev) = tw->next;
+       tw->pprev = NULL;
+       write_unlock(&ehead->lock);
+
+       /* Disassociate with bind bucket. */
+       bhead = &tcp_bhash[tcp_bhashfn(tw->num)];
+       spin_lock(&bhead->lock);
+       if ((tb = tw->tb) != NULL) {
+               if(tw->bind_next)
+                       tw->bind_next->bind_pprev = tw->bind_pprev;
+               *(tw->bind_pprev) = tw->bind_next;
+               tw->tb = NULL;
+               if (tb->owners == NULL) {
+                       if (tb->next)
+                               tb->next->pprev = tb->pprev;
+                       *(tb->pprev) = tb->next;
+                       kmem_cache_free(tcp_bucket_cachep, tb);
+               }
+       }
+       spin_unlock(&bhead->lock);
+
+#ifdef INET_REFCNT_DEBUG
+       if (atomic_read(&tw->refcnt) != 1) {
+               printk(KERN_DEBUG "tw_bucket %p refcnt=%d\n", tw, atomic_read(&tw->refcnt));
+       }
+#endif
+       tcp_tw_put(tw);
+}
+
+/* 
+ * * Main purpose of TIME-WAIT state is to close connection gracefully,
+ *   when one of ends sits in LAST-ACK or CLOSING retransmitting FIN
+ *   (and, probably, tail of data) and one or more our ACKs are lost.
+ * * What is TIME-WAIT timeout? It is associated with maximal packet
+ *   lifetime in the internet, which results in wrong conclusion, that
+ *   it is set to catch "old duplicate segments" wandering out of their path.
+ *   It is not quite correct. This timeout is calculated so that it exceeds
+ *   maximal retransmision timeout enough to allow to lose one (or more)
+ *   segments sent by peer and our ACKs. This time may be calculated from RTO.
+ * * When TIME-WAIT socket receives RST, it means that another end
+ *   finally closed and we are allowed to kill TIME-WAIT too.
+ * * Second purpose of TIME-WAIT is catching old duplicate segments.
+ *   Well, certainly it is pure paranoia, but if we load TIME-WAIT
+ *   with this semantics, we MUST NOT kill TIME-WAIT state with RSTs.
+ * * If we invented some more clever way to catch duplicates
+ *   (f.e. based on PAWS), we could truncate TIME-WAIT to several RTOs.
+ *
+ * The algorithm below is based on FORMAL INTERPRETATION of RFCs.
+ * When you compare it to RFCs, please, read section SEGMENT ARRIVES
+ * from the very beginning.
+ *
+ * NOTE. With recycling (and later with fin-wait-2) TW bucket
+ * is _not_ stateless. It means, that strictly speaking we must
+ * spinlock it. I do not want! Well, probability of misbehaviour
+ * is ridiculously low and, seems, we could use some mb() tricks
+ * to avoid misread sequence numbers, states etc.  --ANK
+ */
+enum tcp_tw_status
+tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb,
+                          struct tcphdr *th, unsigned len)
+{
+       struct tcp_opt tp;
+       int paws_reject = 0;
+
+       tp.saw_tstamp = 0;
+       if (th->doff > (sizeof(struct tcphdr)>>2) && tw->ts_recent_stamp) {
+               tcp_parse_options(skb, &tp);
+
+               if (tp.saw_tstamp) {
+                       tp.ts_recent = tw->ts_recent;
+                       tp.ts_recent_stamp = tw->ts_recent_stamp;
+                       paws_reject = tcp_paws_check(&tp, th->rst);
+               }
+       }
+
+       if (tw->substate == TCP_FIN_WAIT2) {
+               /* Just repeat all the checks of tcp_rcv_state_process() */
+
+               /* Out of window, send ACK */
+               if (paws_reject ||
+                   !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
+                                  tw->rcv_nxt, tw->rcv_nxt + tw->rcv_wnd))
+                       return TCP_TW_ACK;
+
+               if (th->rst)
+                       goto kill;
+
+               if (th->syn && TCP_SKB_CB(skb)->seq != tw->syn_seq)
+                       goto kill_with_rst;
+
+               /* Dup ACK? */
+               if (!after(TCP_SKB_CB(skb)->end_seq, tw->rcv_nxt) ||
+                   TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) {
+                       tcp_tw_put(tw);
+                       return TCP_TW_SUCCESS;
+               }
+
+               /* New data or FIN. If new data arrive after half-duplex close,
+                * reset.
+                */
+               if (!th->fin || TCP_SKB_CB(skb)->end_seq != tw->rcv_nxt+1) {
+kill_with_rst:
+                       tcp_tw_deschedule(tw);
+                       tcp_timewait_kill(tw);
+                       tcp_tw_put(tw);
+                       return TCP_TW_RST;
+               }
+
+               /* FIN arrived, enter true time-wait state. */
+               tw->substate = TCP_TIME_WAIT;
+               tw->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+               if (tp.saw_tstamp) {
+                       tw->ts_recent_stamp = xtime.tv_sec;
+                       tw->ts_recent = tp.rcv_tsval;
+               }
+
+               /* I am shamed, but failed to make it more elegant.
+                * Yes, it is direct reference to IP, which is impossible
+                * to generalize to IPv6. Taking into account that IPv6
+                * do not undertsnad recycling in any case, it not
+                * a big problem in practice. --ANK */
+               if (tw->family == AF_INET &&
+                   sysctl_tcp_tw_recycle && tw->ts_recent_stamp &&
+                   tcp_v4_tw_remember_stamp(tw))
+                       tcp_tw_schedule(tw, tw->timeout);
+               else
+                       tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+               return TCP_TW_ACK;
+       }
+
+       /*
+        *      Now real TIME-WAIT state.
+        *
+        *      RFC 1122:
+        *      "When a connection is [...] on TIME-WAIT state [...]
+        *      [a TCP] MAY accept a new SYN from the remote TCP to
+        *      reopen the connection directly, if it:
+        *      
+        *      (1)  assigns its initial sequence number for the new
+        *      connection to be larger than the largest sequence
+        *      number it used on the previous connection incarnation,
+        *      and
+        *
+        *      (2)  returns to TIME-WAIT state if the SYN turns out 
+        *      to be an old duplicate".
+        */
+
+       if (!paws_reject &&
+           (TCP_SKB_CB(skb)->seq == tw->rcv_nxt &&
+            (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) {
+               /* In window segment, it may be only reset or bare ack. */
+
+               if (th->rst) {
+                       /* This is TIME_WAIT assasination, in two flavors.
+                        * Oh well... nobody has a sufficient solution to this
+                        * protocol bug yet.
+                        */
+                       if (sysctl_tcp_rfc1337 == 0) {
+kill:
+                               tcp_tw_deschedule(tw);
+                               tcp_timewait_kill(tw);
+                               tcp_tw_put(tw);
+                               return TCP_TW_SUCCESS;
+                       }
+               }
+               tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+
+               if (tp.saw_tstamp) {
+                       tw->ts_recent = tp.rcv_tsval;
+                       tw->ts_recent_stamp = xtime.tv_sec;
+               }
+
+               tcp_tw_put(tw);
+               return TCP_TW_SUCCESS;
+       }
+
+       /* Out of window segment.
+
+          All the segments are ACKed immediately.
+
+          The only exception is new SYN. We accept it, if it is
+          not old duplicate and we are not in danger to be killed
+          by delayed old duplicates. RFC check is that it has
+          newer sequence number works at rates <40Mbit/sec.
+          However, if paws works, it is reliable AND even more,
+          we even may relax silly seq space cutoff.
+
+          RED-PEN: we violate main RFC requirement, if this SYN will appear
+          old duplicate (i.e. we receive RST in reply to SYN-ACK),
+          we must return socket to time-wait state. It is not good,
+          but not fatal yet.
+        */
+
+       if (th->syn && !th->rst && !th->ack && !paws_reject &&
+           (after(TCP_SKB_CB(skb)->seq, tw->rcv_nxt) ||
+            (tp.saw_tstamp && (s32)(tw->ts_recent - tp.rcv_tsval) < 0))) {
+               u32 isn = tw->snd_nxt+65535+2;
+               if (isn == 0)
+                       isn++;
+               TCP_SKB_CB(skb)->when = isn;
+               return TCP_TW_SYN;
+       }
+
+       if (paws_reject)
+               NET_INC_STATS_BH(PAWSEstabRejected);
+
+       if(!th->rst) {
+               /* In this case we must reset the TIMEWAIT timer.
+                *
+                * If it is ACKless SYN it may be both old duplicate
+                * and new good SYN with random sequence number <rcv_nxt.
+                * Do not reschedule in the last case.
+                */
+               if (paws_reject || th->ack)
+                       tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN);
+
+               /* Send ACK. Note, we do not put the bucket,
+                * it will be released by caller.
+                */
+               return TCP_TW_ACK;
+       }
+       tcp_tw_put(tw);
+       return TCP_TW_SUCCESS;
+}
+
+/* Enter the time wait state.  This is called with locally disabled BH.
+ * Essentially we whip up a timewait bucket, copy the
+ * relevant info into it from the SK, and mess with hash chains
+ * and list linkage.
+ */
+static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw)
+{
+       struct tcp_ehash_bucket *ehead = &tcp_ehash[sk->hashent];
+       struct tcp_bind_hashbucket *bhead;
+       struct sock **head, *sktw;
+
+       write_lock(&ehead->lock);
+
+       /* Step 1: Remove SK from established hash. */
+       if (sk->pprev) {
+               if(sk->next)
+                       sk->next->pprev = sk->pprev;
+               *sk->pprev = sk->next;
+               sk->pprev = NULL;
+               sock_prot_dec_use(sk->prot);
+       }
+
+       /* Step 2: Hash TW into TIMEWAIT half of established hash table. */
+       head = &(ehead + tcp_ehash_size)->chain;
+       sktw = (struct sock *)tw;
+       if((sktw->next = *head) != NULL)
+               (*head)->pprev = &sktw->next;
+       *head = sktw;
+       sktw->pprev = head;
+       atomic_inc(&tw->refcnt);
+
+       write_unlock(&ehead->lock);
+
+       /* Step 3: Put TW into bind hash. Original socket stays there too.
+          Note, that any socket with sk->num!=0 MUST be bound in binding
+          cache, even if it is closed.
+        */
+       bhead = &tcp_bhash[tcp_bhashfn(sk->num)];
+       spin_lock(&bhead->lock);
+       tw->tb = (struct tcp_bind_bucket *)sk->prev;
+       BUG_TRAP(sk->prev!=NULL);
+       if ((tw->bind_next = tw->tb->owners) != NULL)
+               tw->tb->owners->bind_pprev = &tw->bind_next;
+       tw->tb->owners = (struct sock*)tw;
+       tw->bind_pprev = &tw->tb->owners;
+       spin_unlock(&bhead->lock);
+}
+
+/* 
+ * Move a socket to time-wait or dead fin-wait-2 state.
+ */ 
+void tcp_time_wait(struct sock *sk, int state, int timeo)
+{
+       struct tcp_tw_bucket *tw = NULL;
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       int recycle_ok = 0;
+
+       if (sysctl_tcp_tw_recycle && tp->ts_recent_stamp)
+               recycle_ok = tp->af_specific->remember_stamp(sk);
+
+       if (tcp_tw_count < sysctl_tcp_max_tw_buckets)
+               tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC);
+
+       if(tw != NULL) {
+               int rto = (tp->rto<<2) - (tp->rto>>1);
+
+               /* Give us an identity. */
+               tw->daddr       = sk->daddr;
+               tw->rcv_saddr   = sk->rcv_saddr;
+               tw->bound_dev_if= sk->bound_dev_if;
+               tw->num         = sk->num;
+               tw->state       = TCP_TIME_WAIT;
+               tw->substate    = state;
+               tw->sport       = sk->sport;
+               tw->dport       = sk->dport;
+               tw->family      = sk->family;
+               tw->reuse       = sk->reuse;
+               tw->rcv_wscale  = tp->rcv_wscale;
+               atomic_set(&tw->refcnt, 0);
+
+               tw->hashent     = sk->hashent;
+               tw->rcv_nxt     = tp->rcv_nxt;
+               tw->snd_nxt     = tp->snd_nxt;
+               tw->rcv_wnd     = tcp_receive_window(tp);
+               tw->syn_seq     = tp->syn_seq;
+               tw->ts_recent   = tp->ts_recent;
+               tw->ts_recent_stamp= tp->ts_recent_stamp;
+               tw->pprev_death = NULL;
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+               if(tw->family == PF_INET6) {
+                       memcpy(&tw->v6_daddr,
+                              &sk->net_pinfo.af_inet6.daddr,
+                              sizeof(struct in6_addr));
+                       memcpy(&tw->v6_rcv_saddr,
+                              &sk->net_pinfo.af_inet6.rcv_saddr,
+                              sizeof(struct in6_addr));
+               }
+#endif
+               /* Linkage updates. */
+               __tcp_tw_hashdance(sk, tw);
+
+               /* Get the TIME_WAIT timeout firing. */
+               if (timeo < rto)
+                       timeo = rto;
+
+               if (recycle_ok) {
+                       tw->timeout = rto;
+               } else {
+                       tw->timeout = TCP_TIMEWAIT_LEN;
+                       if (state == TCP_TIME_WAIT)
+                               timeo = TCP_TIMEWAIT_LEN;
+               }
+
+               tcp_tw_schedule(tw, timeo);
+       } else {
+               /* Sorry, if we're out of memory, just CLOSE this
+                * socket up.  We've got bigger problems than
+                * non-graceful socket closings.
+                */
+               if (net_ratelimit())
+                       printk(KERN_INFO "TCP: time wait bucket table overflow\n");
+       }
+
+       tcp_update_metrics(sk);
+       tcp_done(sk);
+}
+
+/* Kill off TIME_WAIT sockets once their lifetime has expired. */
+static int tcp_tw_death_row_slot = 0;
+
+static void tcp_twkill(unsigned long);
+
+static struct tcp_tw_bucket *tcp_tw_death_row[TCP_TWKILL_SLOTS];
+static spinlock_t tw_death_lock = SPIN_LOCK_UNLOCKED;
+static struct timer_list tcp_tw_timer = { function: tcp_twkill };
+
+static void SMP_TIMER_NAME(tcp_twkill)(unsigned long dummy)
+{
+       struct tcp_tw_bucket *tw;
+       int killed = 0;
+
+       /* NOTE: compare this to previous version where lock
+        * was released after detaching chain. It was racy,
+        * because tw buckets are scheduled in not serialized context
+        * in 2.3 (with netfilter), and with softnet it is common, because
+        * soft irqs are not sequenced.
+        */
+       spin_lock(&tw_death_lock);
+
+       if (tcp_tw_count == 0)
+               goto out;
+
+       while((tw = tcp_tw_death_row[tcp_tw_death_row_slot]) != NULL) {
+               tcp_tw_death_row[tcp_tw_death_row_slot] = tw->next_death;
+               tw->pprev_death = NULL;
+               spin_unlock(&tw_death_lock);
+
+               tcp_timewait_kill(tw);
+               tcp_tw_put(tw);
+
+               killed++;
+
+               spin_lock(&tw_death_lock);
+       }
+       tcp_tw_death_row_slot =
+               ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
+
+       if ((tcp_tw_count -= killed) != 0)
+               mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
+       net_statistics[smp_processor_id()*2].TimeWaited += killed;
+out:
+       spin_unlock(&tw_death_lock);
+}
+
+SMP_TIMER_DEFINE(tcp_twkill, tcp_twkill_task);
+
+/* These are always called from BH context.  See callers in
+ * tcp_input.c to verify this.
+ */
+
+/* This is for handling early-kills of TIME_WAIT sockets. */
+void tcp_tw_deschedule(struct tcp_tw_bucket *tw)
+{
+       spin_lock(&tw_death_lock);
+       if (tw->pprev_death) {
+               if(tw->next_death)
+                       tw->next_death->pprev_death = tw->pprev_death;
+               *tw->pprev_death = tw->next_death;
+               tw->pprev_death = NULL;
+               tcp_tw_put(tw);
+               if (--tcp_tw_count == 0)
+                       del_timer(&tcp_tw_timer);
+       }
+       spin_unlock(&tw_death_lock);
+}
+
+/* Short-time timewait calendar */
+
+static int tcp_twcal_hand = -1;
+static int tcp_twcal_jiffie;
+static void tcp_twcal_tick(unsigned long);
+static struct timer_list tcp_twcal_timer = {function: tcp_twcal_tick};
+static struct tcp_tw_bucket *tcp_twcal_row[TCP_TW_RECYCLE_SLOTS];
+
+void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo)
+{
+       struct tcp_tw_bucket **tpp;
+       int slot;
+
+       /* timeout := RTO * 3.5
+        *
+        * 3.5 = 1+2+0.5 to wait for two retransmits.
+        *
+        * RATIONALE: if FIN arrived and we entered TIME-WAIT state,
+        * our ACK acking that FIN can be lost. If N subsequent retransmitted
+        * FINs (or previous seqments) are lost (probability of such event
+        * is p^(N+1), where p is probability to lose single packet and
+        * time to detect the loss is about RTO*(2^N - 1) with exponential
+        * backoff). Normal timewait length is calculated so, that we
+        * waited at least for one retransmitted FIN (maximal RTO is 120sec).
+        * [ BTW Linux. following BSD, violates this requirement waiting
+        *   only for 60sec, we should wait at least for 240 secs.
+        *   Well, 240 consumes too much of resources 8)
+        * ]
+        * This interval is not reduced to catch old duplicate and
+        * responces to our wandering segments living for two MSLs.
+        * However, if we use PAWS to detect
+        * old duplicates, we can reduce the interval to bounds required
+        * by RTO, rather than MSL. So, if peer understands PAWS, we
+        * kill tw bucket after 3.5*RTO (it is important that this number
+        * is greater than TS tick!) and detect old duplicates with help
+        * of PAWS.
+        */
+       slot = (timeo + (1<<TCP_TW_RECYCLE_TICK) - 1) >> TCP_TW_RECYCLE_TICK;
+
+       spin_lock(&tw_death_lock);
+
+       /* Unlink it, if it was scheduled */
+       if (tw->pprev_death) {
+               if(tw->next_death)
+                       tw->next_death->pprev_death = tw->pprev_death;
+               *tw->pprev_death = tw->next_death;
+               tw->pprev_death = NULL;
+               tcp_tw_count--;
+       } else
+               atomic_inc(&tw->refcnt);
+
+       if (slot >= TCP_TW_RECYCLE_SLOTS) {
+               /* Schedule to slow timer */
+               if (timeo >= TCP_TIMEWAIT_LEN) {
+                       slot = TCP_TWKILL_SLOTS-1;
+               } else {
+                       slot = (timeo + TCP_TWKILL_PERIOD-1) / TCP_TWKILL_PERIOD;
+                       if (slot >= TCP_TWKILL_SLOTS)
+                               slot = TCP_TWKILL_SLOTS-1;
+               }
+               tw->ttd = jiffies + timeo;
+               slot = (tcp_tw_death_row_slot + slot) & (TCP_TWKILL_SLOTS - 1);
+               tpp = &tcp_tw_death_row[slot];
+       } else {
+               tw->ttd = jiffies + (slot<<TCP_TW_RECYCLE_TICK);
+
+               if (tcp_twcal_hand < 0) {
+                       tcp_twcal_hand = 0;
+                       tcp_twcal_jiffie = jiffies;
+                       tcp_twcal_timer.expires = tcp_twcal_jiffie + (slot<<TCP_TW_RECYCLE_TICK);
+                       add_timer(&tcp_twcal_timer);
+               } else {
+                       if ((long)(tcp_twcal_timer.expires - jiffies) > (slot<<TCP_TW_RECYCLE_TICK))
+                               mod_timer(&tcp_twcal_timer, jiffies + (slot<<TCP_TW_RECYCLE_TICK));
+                       slot = (tcp_twcal_hand + slot)&(TCP_TW_RECYCLE_SLOTS-1);
+               }
+               tpp = &tcp_twcal_row[slot];
+       }
+
+       if((tw->next_death = *tpp) != NULL)
+               (*tpp)->pprev_death = &tw->next_death;
+       *tpp = tw;
+       tw->pprev_death = tpp;
+
+       if (tcp_tw_count++ == 0)
+               mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
+       spin_unlock(&tw_death_lock);
+}
+
+void SMP_TIMER_NAME(tcp_twcal_tick)(unsigned long dummy)
+{
+       int n, slot;
+       unsigned long j;
+       unsigned long now = jiffies;
+       int killed = 0;
+       int adv = 0;
+
+       spin_lock(&tw_death_lock);
+       if (tcp_twcal_hand < 0)
+               goto out;
+
+       slot = tcp_twcal_hand;
+       j = tcp_twcal_jiffie;
+
+       for (n=0; n<TCP_TW_RECYCLE_SLOTS; n++) {
+               if ((long)(j - now) <= 0) {
+                       struct tcp_tw_bucket *tw;
+
+                       while((tw = tcp_twcal_row[slot]) != NULL) {
+                               tcp_twcal_row[slot] = tw->next_death;
+                               tw->pprev_death = NULL;
+
+                               tcp_timewait_kill(tw);
+                               tcp_tw_put(tw);
+                               killed++;
+                       }
+               } else {
+                       if (!adv) {
+                               adv = 1;
+                               tcp_twcal_jiffie = j;
+                               tcp_twcal_hand = slot;
+                       }
+
+                       if (tcp_twcal_row[slot] != NULL) {
+                               mod_timer(&tcp_twcal_timer, j);
+                               goto out;
+                       }
+               }
+               j += (1<<TCP_TW_RECYCLE_TICK);
+               slot = (slot+1)&(TCP_TW_RECYCLE_SLOTS-1);
+       }
+       tcp_twcal_hand = -1;
+
+out:
+       if ((tcp_tw_count -= killed) == 0)
+               del_timer(&tcp_tw_timer);
+       net_statistics[smp_processor_id()*2].TimeWaitKilled += killed;
+       spin_unlock(&tw_death_lock);
+}
+
+SMP_TIMER_DEFINE(tcp_twcal_tick, tcp_twcal_tasklet);
+
+
+/* This is not only more efficient than what we used to do, it eliminates
+ * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
+ *
+ * Actually, we could lots of memory writes here. tp of listening
+ * socket contains all necessary default parameters.
+ */
+struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, struct sk_buff *skb)
+{
+       struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, 0);
+
+       if(newsk != NULL) {
+               struct tcp_opt *newtp;
+#ifdef CONFIG_FILTER
+               struct sk_filter *filter;
+#endif
+
+               memcpy(newsk, sk, sizeof(*newsk));
+               newsk->state = TCP_SYN_RECV;
+
+               /* SANITY */
+               newsk->pprev = NULL;
+               newsk->prev = NULL;
+
+               /* Clone the TCP header template */
+               newsk->dport = req->rmt_port;
+
+               sock_lock_init(newsk);
+               bh_lock_sock(newsk);
+
+               atomic_set(&newsk->rmem_alloc, 0);
+               skb_queue_head_init(&newsk->receive_queue);
+               atomic_set(&newsk->wmem_alloc, 0);
+               skb_queue_head_init(&newsk->write_queue);
+               atomic_set(&newsk->omem_alloc, 0);
+               newsk->wmem_queued = 0;
+               newsk->forward_alloc = 0;
+
+               newsk->done = 0;
+               newsk->proc = 0;
+               newsk->backlog.head = newsk->backlog.tail = NULL;
+               skb_queue_head_init(&newsk->error_queue);
+               newsk->write_space = tcp_write_space;
+#ifdef CONFIG_FILTER
+               if ((filter = newsk->filter) != NULL)
+                       sk_filter_charge(newsk, filter);
+#endif
+
+               /* Now setup tcp_opt */
+               newtp = &(newsk->tp_pinfo.af_tcp);
+               newtp->pred_flags = 0;
+               newtp->rcv_nxt = req->rcv_isn + 1;
+               newtp->snd_nxt = req->snt_isn + 1;
+               newtp->snd_una = req->snt_isn + 1;
+               newtp->snd_sml = req->snt_isn + 1;
+
+               tcp_delack_init(newtp);
+
+               tcp_prequeue_init(newtp);
+
+               tcp_init_wl(newtp, req->snt_isn, req->rcv_isn);
+
+               newtp->retransmits = 0;
+               newtp->backoff = 0;
+               newtp->srtt = 0;
+               newtp->mdev = TCP_TIMEOUT_INIT;
+               newtp->rto = TCP_TIMEOUT_INIT;
+
+               newtp->packets_out = 0;
+               newtp->left_out = 0;
+               newtp->retrans_out = 0;
+               newtp->sacked_out = 0;
+               newtp->fackets_out = 0;
+               newtp->snd_ssthresh = 0x7fffffff;
+
+               /* So many TCP implementations out there (incorrectly) count the
+                * initial SYN frame in their delayed-ACK and congestion control
+                * algorithms that we must have the following bandaid to talk
+                * efficiently to them.  -DaveM
+                */
+               newtp->snd_cwnd = 2;
+               newtp->snd_cwnd_cnt = 0;
+
+               newtp->ca_state = TCP_CA_Open;
+               tcp_init_xmit_timers(newsk);
+               skb_queue_head_init(&newtp->out_of_order_queue);
+               newtp->send_head = NULL;
+               newtp->rcv_wup = req->rcv_isn + 1;
+               newtp->write_seq = req->snt_isn + 1;
+               newtp->pushed_seq = newtp->write_seq;
+               newtp->copied_seq = req->rcv_isn + 1;
+
+               newtp->saw_tstamp = 0;
+
+               newtp->dsack = 0;
+               newtp->eff_sacks = 0;
+
+               newtp->probes_out = 0;
+               newtp->num_sacks = 0;
+               newtp->syn_seq = req->rcv_isn;
+               newtp->fin_seq = req->rcv_isn;
+               newtp->urg_data = 0;
+               newtp->listen_opt = NULL;
+               newtp->accept_queue = newtp->accept_queue_tail = NULL;
+               /* Deinitialize syn_wait_lock to trap illegal accesses. */
+               memset(&newtp->syn_wait_lock, 0, sizeof(newtp->syn_wait_lock));
+
+               /* Back to base struct sock members. */
+               newsk->err = 0;
+               newsk->priority = 0;
+               atomic_set(&newsk->refcnt, 1);
+#ifdef INET_REFCNT_DEBUG
+               atomic_inc(&inet_sock_nr);
+#endif
+               atomic_inc(&tcp_sockets_allocated);
+
+               if (newsk->keepopen)
+                       tcp_reset_keepalive_timer(newsk, keepalive_time_when(newtp));
+               newsk->socket = NULL;
+               newsk->sleep = NULL;
+
+               newtp->tstamp_ok = req->tstamp_ok;
+               if((newtp->sack_ok = req->sack_ok) != 0) {
+                       if (sysctl_tcp_fack)
+                               newtp->sack_ok |= 2;
+               }
+               newtp->window_clamp = req->window_clamp;
+               newtp->rcv_ssthresh = req->rcv_wnd;
+               newtp->rcv_wnd = req->rcv_wnd;
+               newtp->wscale_ok = req->wscale_ok;
+               if (newtp->wscale_ok) {
+                       newtp->snd_wscale = req->snd_wscale;
+                       newtp->rcv_wscale = req->rcv_wscale;
+               } else {
+                       newtp->snd_wscale = newtp->rcv_wscale = 0;
+                       newtp->window_clamp = min(newtp->window_clamp,65535);
+               }
+               newtp->snd_wnd = ntohs(skb->h.th->window) << newtp->snd_wscale;
+               newtp->max_window = newtp->snd_wnd;
+
+               if (newtp->tstamp_ok) {
+                       newtp->ts_recent = req->ts_recent;
+                       newtp->ts_recent_stamp = xtime.tv_sec;
+                       newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+               } else {
+                       newtp->ts_recent_stamp = 0;
+                       newtp->tcp_header_len = sizeof(struct tcphdr);
+               }
+               if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len)
+                       newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len;
+               newtp->mss_clamp = req->mss;
+               TCP_ECN_openreq_child(newtp, req);
+       }
+       return newsk;
+}
+
+/* 
+ *     Process an incoming packet for SYN_RECV sockets represented
+ *     as an open_request.
+ */
+
+struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
+                          struct open_request *req,
+                          struct open_request **prev)
+{
+       struct tcphdr *th = skb->h.th;
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
+       int paws_reject = 0;
+       struct tcp_opt ttp;
+       struct sock *child;
+
+       ttp.saw_tstamp = 0;
+       if (th->doff > (sizeof(struct tcphdr)>>2)) {
+               tcp_parse_options(skb, &ttp);
+
+               if (ttp.saw_tstamp) {
+                       ttp.ts_recent = req->ts_recent;
+                       /* We do not store true stamp, but it is not required,
+                        * it can be estimated (approximately)
+                        * from another data.
+                        */
+                       ttp.ts_recent_stamp = xtime.tv_sec - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
+                       paws_reject = tcp_paws_check(&ttp, th->rst);
+               }
+       }
+
+       /* Check for pure retransmited SYN. */
+       if (TCP_SKB_CB(skb)->seq == req->rcv_isn &&
+           flg == TCP_FLAG_SYN &&
+           !paws_reject) {
+               /*
+                * RFC793 draws (Incorrectly! It was fixed in RFC1122)
+                * this case on figure 6 and figure 8, but formal
+                * protocol description says NOTHING.
+                * To be more exact, it says that we should send ACK,
+                * because this segment (at least, if it has no data)
+                * is out of window.
+                *
+                *  CONCLUSION: RFC793 (even with RFC1122) DOES NOT
+                *  describe SYN-RECV state. All the description
+                *  is wrong, we cannot believe to it and should
+                *  rely only on common sense and implementation
+                *  experience.
+                *
+                * Enforce "SYN-ACK" according to figure 8, figure 6
+                * of RFC793, fixed by RFC1122.
+                */
+               req->class->rtx_syn_ack(sk, req, NULL);
+               return NULL;
+       }
+
+       /* Further reproduces section "SEGMENT ARRIVES"
+          for state SYN-RECEIVED of RFC793.
+          It is broken, however, it does not work only
+          when SYNs are crossed, which is impossible in our
+          case.
+
+          But generally, we should (RFC lies!) to accept ACK
+          from SYNACK both here and in tcp_rcv_state_process().
+          tcp_rcv_state_process() does not, hence, we do not too.
+
+          Note that the case is absolutely generic:
+          we cannot optimize anything here without
+          violating protocol. All the checks must be made
+          before attempt to create socket.
+        */
+
+       /* RFC793: "first check sequence number". */
+
+       if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
+                                         req->rcv_isn+1, req->rcv_isn+1+req->rcv_wnd)) {
+               /* Out of window: send ACK and drop. */
+               if (!(flg & TCP_FLAG_RST))
+                       req->class->send_ack(skb, req);
+               if (paws_reject)
+                       NET_INC_STATS_BH(PAWSEstabRejected);
+               return NULL;
+       }
+
+       /* In sequence, PAWS is OK. */
+
+       if (ttp.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
+               req->ts_recent = ttp.rcv_tsval;
+
+       if (TCP_SKB_CB(skb)->seq == req->rcv_isn) {
+               /* Truncate SYN, it is out of window starting
+                  at req->rcv_isn+1. */
+               flg &= ~TCP_FLAG_SYN;
+       }
+
+       /* RFC793: "second check the RST bit" and
+        *         "fourth, check the SYN bit"
+        */
+       if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN))
+               goto embryonic_reset;
+
+       /* RFC793: "fifth check the ACK field" */
+
+       if (!(flg & TCP_FLAG_ACK))
+               return NULL;
+
+       /* Invalid ACK: reset will be sent by listening socket */
+       if (TCP_SKB_CB(skb)->ack_seq != req->snt_isn+1)
+               return sk;
+       /* Also, it would be not so bad idea to check rcv_tsecr, which
+        * is essentially ACK extension and too early or too late values
+        * should cause reset in unsynchronized states.
+        */
+
+       /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
+       if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == req->rcv_isn+1) {
+               req->acked = 1;
+               return NULL;
+       }
+
+       /* OK, ACK is valid, create big socket and
+        * feed this segment to it. It will repeat all
+        * the tests. THIS SEGMENT MUST MOVE SOCKET TO
+        * ESTABLISHED STATE. If it will be dropped after
+        * socket is created, wait for troubles.
+        */
+       child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
+       if (child == NULL)
+               goto listen_overflow;
+
+       tcp_synq_unlink(tp, req, prev);
+       tcp_synq_removed(sk, req);
+
+       tcp_acceptq_queue(sk, req, child);
+       return child;
+
+listen_overflow:
+       if (!sysctl_tcp_abort_on_overflow) {
+               req->acked = 1;
+               return NULL;
+       }
+
+embryonic_reset:
+       NET_INC_STATS_BH(EmbryonicRsts);
+       if (!(flg & TCP_FLAG_RST))
+               req->class->send_reset(skb);
+
+       tcp_synq_drop(sk, req, prev);
+       return NULL;
+}
+
+/*
+ * Queue segment on the new socket if the new socket is active,
+ * otherwise we just shortcircuit this and continue with
+ * the new socket.
+ */
+
+int tcp_child_process(struct sock *parent, struct sock *child,
+                     struct sk_buff *skb)
+{
+       int ret = 0;
+       int state = child->state;
+
+       if (child->lock.users == 0) {
+               ret = tcp_rcv_state_process(child, skb, skb->h.th, skb->len);
+
+               /* Wakeup parent, send SIGIO */
+               if (state == TCP_SYN_RECV && child->state != state)
+                       parent->data_ready(parent, 0);
+       } else {
+               /* Alas, it is possible again, because we do lookup
+                * in main socket hash table and lock on listening
+                * socket does not protect us more.
+                */
+               sk_add_backlog(child, skb);
+       }
+
+       bh_unlock_sock(child);
+       return ret;
+}
index 0fdb6b3f8d9a483513ffc1331e2d4348cec29c39..6cb75d7309e5f559660672586e9a93632cff21ea 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:    $Id: tcp_output.c,v 1.124 2000/04/08 07:21:24 davem Exp $
+ * Version:    $Id: tcp_output.c,v 1.125 2000/08/09 11:59:04 davem Exp $
  *
  * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -32,6 +32,7 @@
  *             David S. Miller :       Output engine completely rewritten.
  *             Andrea Arcangeli:       SYNACK carry ts_recent in tsecr.
  *             Cacophonix Gaul :       draft-minshall-nagle-01
+ *             J Hadi Salim    :       ECN support
  *
  */
 
 /* People can turn this off for buggy TCP's found in printers etc. */
 int sysctl_tcp_retrans_collapse = 1;
 
-static __inline__ void update_send_head(struct sock *sk)
+static __inline__
+void update_send_head(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
 {
-       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
-
-       tp->send_head = tp->send_head->next;
+       tp->send_head = skb->next;
        if (tp->send_head == (struct sk_buff *) &sk->write_queue)
                tp->send_head = NULL;
+       tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+       if (tp->packets_out++ == 0)
+               tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+}
+
+/* SND.NXT, if window was not shrunk.
+ * If window has been shrunk, what should we make? It is not clear at all.
+ * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-(
+ * Anything in between SND.UNA...SND.UNA+SND.WND also can be already
+ * invalid. OK, let's make this for now:
+ */
+static __inline__ __u32 tcp_acceptable_seq(struct sock *sk, struct tcp_opt *tp)
+{
+       if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt))
+               return tp->snd_nxt;
+       else
+               return tp->snd_una+tp->snd_wnd;
 }
 
 /* Calculate mss to advertise in SYN segment.
@@ -79,15 +96,38 @@ static __u16 tcp_advertise_mss(struct sock *sk)
        return (__u16)mss;
 }
 
+/* RFC2861. Reset CWND after idle period longer RTO to "restart window".
+ * This is the first part of cwnd validation mechanism. */
+static void tcp_cwnd_restart(struct tcp_opt *tp)
+{
+       s32 delta = tcp_time_stamp - tp->lsndtime;
+       u32 restart_cwnd = tcp_init_cwnd(tp);
+       u32 cwnd = tp->snd_cwnd;
+
+       tp->snd_ssthresh = tcp_current_ssthresh(tp);
+       restart_cwnd = min(restart_cwnd, cwnd);
+
+       while ((delta -= tp->rto) > 0 && cwnd > restart_cwnd)
+               cwnd >>= 1;
+       tp->snd_cwnd = max(cwnd, restart_cwnd);
+       tp->snd_cwnd_stamp = tcp_time_stamp;
+       tp->snd_cwnd_used = 0;
+}
+
 static __inline__ void tcp_event_data_sent(struct tcp_opt *tp, struct sk_buff *skb)
 {
-       /* If we had a reply for ato after last received
+       u32 now = tcp_time_stamp;
+
+       if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto)
+               tcp_cwnd_restart(tp);
+
+       tp->lsndtime = now;
+
+       /* If it is a reply for ato after last received
         * packet, enter pingpong mode.
         */
-       if ((u32)(tp->lsndtime - tp->ack.lrcvtime) < tp->ack.ato)
+       if ((u32)(now - tp->ack.lrcvtime) < tp->ack.ato)
                tp->ack.pingpong = 1;
-
-       tp->lsndtime = tcp_time_stamp;
 }
 
 static __inline__ void tcp_event_ack_sent(struct sock *sk)
@@ -95,11 +135,64 @@ static __inline__ void tcp_event_ack_sent(struct sock *sk)
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
        tcp_dec_quickack_mode(tp);
-       tp->ack.pending = 0;
-       tp->ack.rcv_segs = 0;
        tcp_clear_xmit_timer(sk, TCP_TIME_DACK);
+
+       /* If we ever saw N>1 small segments from peer, it has
+        * enough of send buffer to send N packets and does not nagle.
+        * Hence, we may delay acks more aggresively.
+        */
+       if (tp->ack.rcv_small > tp->ack.rcv_thresh+1)
+               tp->ack.rcv_thresh = tp->ack.rcv_small-1;
+       tp->ack.rcv_small = 0;
+}
+
+/* Chose a new window to advertise, update state in tcp_opt for the
+ * socket, and return result with RFC1323 scaling applied.  The return
+ * value can be stuffed directly into th->window for an outgoing
+ * frame.
+ */
+static __inline__ u16 tcp_select_window(struct sock *sk)
+{
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       u32 cur_win = tcp_receive_window(tp);
+       u32 new_win = __tcp_select_window(sk);
+
+       /* Never shrink the offered window */
+       if(new_win < cur_win) {
+               /* Danger Will Robinson!
+                * Don't update rcv_wup/rcv_wnd here or else
+                * we will not be able to advertise a zero
+                * window in time.  --DaveM
+                *
+                * Relax Will Robinson.
+                */
+               new_win = cur_win;
+       }
+       tp->rcv_wnd = new_win;
+       tp->rcv_wup = tp->rcv_nxt;
+
+       /* RFC1323 scaling applied */
+       new_win >>= tp->rcv_wscale;
+
+#ifdef TCP_FORMAL_WINDOW
+       if (new_win == 0) {
+               /* If we advertise zero window, disable fast path. */
+               tp->pred_flags = 0;
+       } else if (cur_win == 0 && tp->pred_flags == 0 &&
+                  skb_queue_len(&tp->out_of_order_queue) == 0 &&
+                  !tp->urg_data) {
+               /* If we open zero window, enable fast path.
+                  Without this it will be open by the first data packet,
+                  it is too late to merge checksumming to copy.
+                */
+               tcp_fast_path_on(tp);
+       }
+#endif
+
+       return new_win;
 }
 
+
 /* This routine actually transmits TCP packets queued in by
  * tcp_do_sendmsg().  This is used by both the initial
  * transmission and possible later retransmissions.
@@ -141,12 +234,12 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
                                if(!(sysctl_flags & SYSCTL_FLAG_TSTAMPS))
                                        tcp_header_size += TCPOLEN_SACKPERM_ALIGNED;
                        }
-               } else if (tp->num_sacks) {
+               } else if (tp->eff_sacks) {
                        /* A SACK is 2 pad bytes, a 2 byte header, plus
                         * 2 32-bit sequence numbers for each SACK block.
                         */
                        tcp_header_size += (TCPOLEN_SACK_BASE_ALIGNED +
-                                           (tp->num_sacks * TCPOLEN_SACK_PERBLOCK));
+                                           (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
                }
                th = (struct tcphdr *) skb_push(skb, tcp_header_size);
                skb->h.th = th;
@@ -155,7 +248,7 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
                /* Build TCP header and checksum it. */
                th->source              = sk->sport;
                th->dest                = sk->dport;
-               th->seq                 = htonl(TCP_SKB_CB(skb)->seq);
+               th->seq                 = htonl(tcb->seq);
                th->ack_seq             = htonl(tp->rcv_nxt);
                *(((__u16 *)th) + 6)    = htons(((tcp_header_size >> 2) << 12) | tcb->flags);
                if (tcb->flags & TCPCB_FLAG_SYN) {
@@ -176,11 +269,13 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
                                              (sysctl_flags & SYSCTL_FLAG_SACK),
                                              (sysctl_flags & SYSCTL_FLAG_WSCALE),
                                              tp->rcv_wscale,
-                                             TCP_SKB_CB(skb)->when,
+                                             tcb->when,
                                              tp->ts_recent);
                } else {
                        tcp_build_and_update_options((__u32 *)(th + 1),
-                                                    tp, TCP_SKB_CB(skb)->when);
+                                                    tp, tcb->when);
+
+                       TCP_ECN_send(sk, tp, skb, tcp_header_size);
                }
                tp->af_specific->send_check(sk, th, skb->len, skb);
 
@@ -196,7 +291,7 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
                if (err <= 0)
                        return err;
 
-               tcp_enter_cong_avoid(tp);
+               tcp_enter_cwr(tp);
 
                /* NET_XMIT_CN is special. It does not guarantee,
                 * that this packet is lost. It tells that device
@@ -212,6 +307,7 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 #undef SYSCTL_FLAG_SACK
 }
 
+
 /* This is the main buffer sending routine. We queue the buffer
  * and decide whether to queue or transmit now.
  *
@@ -225,15 +321,15 @@ void tcp_send_skb(struct sock *sk, struct sk_buff *skb, int force_queue, unsigne
        /* Advance write_seq and place onto the write_queue. */
        tp->write_seq = TCP_SKB_CB(skb)->end_seq;
        __skb_queue_tail(&sk->write_queue, skb);
+       tcp_charge_skb(sk, skb);
 
        if (!force_queue && tp->send_head == NULL && tcp_snd_test(tp, skb, cur_mss, 1)) {
                /* Send it out now. */
                TCP_SKB_CB(skb)->when = tcp_time_stamp;
                if (tcp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)) == 0) {
                        tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
-                       tcp_minshall_update(tp, cur_mss, skb->len);
-                       tp->packets_out++;
-                       if(!tcp_timer_is_set(sk, TCP_TIME_RETRANS))
+                       tcp_minshall_update(tp, cur_mss, skb);
+                       if (tp->packets_out++ == 0)
                                tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
                        return;
                }
@@ -250,16 +346,16 @@ void tcp_send_skb(struct sock *sk, struct sk_buff *skb, int force_queue, unsigne
  */
 static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
 {
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
        struct sk_buff *buff;
        int nsize = skb->len - len;
        u16 flags;
 
        /* Get a new skb... force flag on. */
-       buff = sock_wmalloc(sk,
-                           (nsize + MAX_TCP_HEADER + 15),
-                           1, GFP_ATOMIC);
+       buff = tcp_alloc_skb(sk, nsize + MAX_TCP_HEADER + 15, GFP_ATOMIC);
        if (buff == NULL)
                return -ENOMEM; /* We'll just try again later. */
+       tcp_charge_skb(sk, buff);
 
        /* Reserve space for headers. */
        skb_reserve(buff, MAX_TCP_HEADER);
@@ -286,7 +382,11 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
        if(!(flags & TCPCB_FLAG_URG))
                TCP_SKB_CB(buff)->urg_ptr = 0;
        TCP_SKB_CB(buff)->flags = flags;
-       TCP_SKB_CB(buff)->sacked = 0;
+       TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_EVER_RETRANS);
+       if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) {
+               tp->lost_out++;
+               tp->left_out++;
+       }
 
        /* Copy and checksum data tail into the new buffer. */
        buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize),
@@ -301,11 +401,6 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
 
        /* Looks stupid, but our code really uses when of
         * skbs, which it never sent before. --ANK
-        *
-        * NOTE: several days after I added this, Dave repaired
-        * tcp_simple_retransmit() and it should not use ->when
-        * of never sent skbs more. I am not sure, so that
-        * this line remains until more careful investigation. --ANK
         */
        TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
 
@@ -401,13 +496,6 @@ int tcp_write_xmit(struct sock *sk)
                 */
                mss_now = tcp_current_mss(sk); 
 
-               /* Anything on the transmit queue that fits the window can
-                * be added providing we are:
-                *
-                * a) following SWS avoidance [and Nagle algorithm]
-                * b) not exceeding our congestion window.
-                * c) not retransmitting [Nagle]
-                */
                while((skb = tp->send_head) &&
                      tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb))) {
                        if (skb->len > mss_now) {
@@ -419,19 +507,13 @@ int tcp_write_xmit(struct sock *sk)
                        if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))
                                break;
                        /* Advance the send_head.  This one is sent out. */
-                       update_send_head(sk);
-                       tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
-                       tcp_minshall_update(tp, mss_now, skb->len);
-                       tp->packets_out++;
+                       update_send_head(sk, tp, skb);
+                       tcp_minshall_update(tp, mss_now, skb);
                        sent_pkts = 1;
                }
 
-               /* If we sent anything, make sure the retransmit
-                * timer is active.
-                */
                if (sent_pkts) {
-                       if (!tcp_timer_is_set(sk, TCP_TIME_RETRANS))
-                               tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+                       tcp_cwnd_validate(sk, tp);
                        return 0;
                }
 
@@ -506,28 +588,22 @@ u32 __tcp_select_window(struct sock *sk)
 
        /* Sometimes free_space can be < 0. */
        free_space = tcp_space(sk); 
-       if (free_space > ((int) tp->window_clamp))
-               free_space = tp->window_clamp;
        if (tp->window_clamp < mss)
                mss = tp->window_clamp; 
 
-       if (free_space < min((int)tp->window_clamp, tcp_full_space(sk)) / 2) {
-               /* THIS IS _VERY_ GOOD PLACE to play window clamp.
-                * if free_space becomes suspiciously low
-                * verify ratio rmem_alloc/(rcv_nxt - copied_seq),
-                * and if we predict that when free_space will be lower mss,
-                * rmem_alloc will run out of rcvbuf*2, shrink window_clamp.
-                * It will eliminate most of prune events! Very simple,
-                * it is the next thing to do.                  --ANK
-                *
-                * Provided we found a way to raise it back...  --ANK
-                */
+       if (free_space < (int)min(tp->window_clamp, tcp_full_space(sk)) / 2) {
                tp->ack.quick = 0;
 
+               if (tcp_memory_pressure)
+                       tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4*tp->advmss);
+
                if (free_space < ((int) (mss/2)))
                        return 0;
        }
 
+       if (free_space > tp->rcv_ssthresh)
+               free_space = tp->rcv_ssthresh;
+
        /* Get the largest window that is a nice multiple of mss.
         * Window clamp already applied above.
         * If our current window offering is within 1 mss of the
@@ -547,6 +623,7 @@ u32 __tcp_select_window(struct sock *sk)
 /* Attempt to collapse two adjacent SKB's during retransmission. */
 static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now)
 {
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
        struct sk_buff *next_skb = skb->next;
 
        /* The first test we must make is that neither of these two
@@ -564,6 +641,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
                if(TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
                        return;
 
+               /* Next skb is out of window. */
+               if (after(TCP_SKB_CB(next_skb)->end_seq, tp->snd_una+tp->snd_wnd))
+                       return;
+
                /* Punt if not enough space exists in the first SKB for
                 * the data in the second, or the total combined payload
                 * would exceed the MSS.
@@ -602,8 +683,20 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
                /* All done, get rid of second SKB and account for it so
                 * packet counting does not break.
                 */
-               kfree_skb(next_skb);
-               sk->tp_pinfo.af_tcp.packets_out--;
+               TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&TCPCB_EVER_RETRANS;
+               if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS)
+                       tp->retrans_out--;
+               if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) {
+                       tp->lost_out--;
+                       tp->left_out--;
+               }
+               if (!tp->sack_ok && tp->sacked_out) {
+                       /* Reno case is special. Sigh... */
+                       tp->sacked_out--;
+                       tp->left_out--;
+               }
+               tcp_free_skb(sk, next_skb);
+               tp->packets_out--;
        }
 }
 
@@ -614,53 +707,43 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
 void tcp_simple_retransmit(struct sock *sk)
 {
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-       struct sk_buff *skb, *old_next_skb;
+       struct sk_buff *skb;
        unsigned int mss = tcp_current_mss(sk);
-
-       /* Don't muck with the congestion window here. */
-       tp->dup_acks = 0;
-       tp->high_seq = tp->snd_nxt;
-       tp->retrans_head = NULL;
-
-       /* Input control flow will see that this was retransmitted
-        * and not use it for RTT calculation in the absence of
-        * the timestamp option.
-        */
-       for (old_next_skb = skb = skb_peek(&sk->write_queue);
-            ((skb != tp->send_head) &&
-             (skb != (struct sk_buff *)&sk->write_queue));
-            skb = skb->next) {
-               int resend_skb = 0;
-
-               /* Our goal is to push out the packets which we
-                * sent already, but are being chopped up now to
-                * account for the PMTU information we have.
-                *
-                * As we resend the queue, packets are fragmented
-                * into two pieces, and when we try to send the
-                * second piece it may be collapsed together with
-                * a subsequent packet, and so on.  -DaveM
-                */
-               if (old_next_skb != skb || skb->len > mss)
-                       resend_skb = 1;
-               old_next_skb = skb->next;
-               if (resend_skb != 0) {
-                       if (tcp_retransmit_skb(sk, skb))
-                               break;
+       int lost = 0;
+
+       for_retrans_queue(skb, sk, tp) {
+               if (skb->len > mss && 
+                   !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
+                       if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
+                               TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+                               tp->retrans_out--;
+                       }
+                       if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) {
+                               TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
+                               tp->lost_out++;
+                               lost = 1;
+                       }
                }
        }
-}
 
-static __inline__ void update_retrans_head(struct sock *sk)
-{
-       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
-       
-       tp->retrans_head = tp->retrans_head->next;
-       if((tp->retrans_head == tp->send_head) ||
-          (tp->retrans_head == (struct sk_buff *) &sk->write_queue)) {
-               tp->retrans_head = NULL;
-               tp->rexmt_done = 1;
+       if (!lost)
+               return;
+
+       tp->left_out = tp->sacked_out + tp->lost_out;
+
+       /* Don't muck with the congestion window here.
+        * Reason is that we do not increase amount of _data_
+        * in network, but units changed and effective
+        * cwnd/ssthresh really reduced now.
+        */
+       if (tp->ca_state != TCP_CA_Loss) {
+               tp->high_seq = tp->snd_nxt;
+               tp->snd_ssthresh = tcp_current_ssthresh(tp);
+               tp->prior_ssthresh = 0;
+               tp->undo_marker = 0;
+               tp->ca_state = TCP_CA_Loss;
        }
+       tcp_xmit_retransmit_queue(sk);
 }
 
 /* This retransmits one SKB.  Policy decisions and retransmit queue
@@ -671,18 +754,13 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 {
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
        unsigned int cur_mss = tcp_current_mss(sk);
+       int err;
 
-#ifdef TCP_DEBUG
-       /* It was possible this summer, that retransmit timer
-        * raced with its deletion and hit socket with packets_out==0.
-        * I fixed it, but preserved the check in the place,
-        * where the fault occured. --ANK
+       /* Do not sent more than we queued. 1/4 is reserved for possible
+        * copying overhead: frgagmentation, tunneling, mangling etc.
         */
-       if (skb == NULL) {
-               printk("tcp_retransmit_skb: bug, skb==NULL, caller=%p\n", NET_CALLER(sk));
-               return -EFAULT;
-       }
-#endif
+       if (atomic_read(&sk->wmem_alloc) > min(sk->wmem_queued+(sk->wmem_queued>>2),sk->sndbuf))
+               return -EAGAIN;
 
        if(skb->len > cur_mss) {
                if(tcp_fragment(sk, skb, cur_mss))
@@ -715,23 +793,40 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
                skb->csum = 0;
        }
 
-       /* Ok, we're gonna send it out, update state. */
-       TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_RETRANS;
-       tp->retrans_out++;
-
        /* Make a copy, if the first transmission SKB clone we made
         * is still in somebody's hands, else make a clone.
         */
        TCP_SKB_CB(skb)->when = tcp_time_stamp;
-       if(skb_cloned(skb))
-               skb = skb_copy(skb, GFP_ATOMIC);
-       else
-               skb = skb_clone(skb, GFP_ATOMIC);
 
-       /* Update global TCP statistics and return success. */
-       TCP_INC_STATS(TcpRetransSegs);
+       err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
+                                   skb_copy(skb, GFP_ATOMIC):
+                                   skb_clone(skb, GFP_ATOMIC)));
 
-       return tcp_transmit_skb(sk, skb);
+       if (err == 0) {
+               /* Update global TCP statistics. */
+               TCP_INC_STATS(TcpRetransSegs);
+
+#if FASTRETRANS_DEBUG > 0
+               if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
+                       if (net_ratelimit())
+                               printk(KERN_DEBUG "retrans_out leaked.\n");
+               }
+#endif
+               TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
+               tp->retrans_out++;
+
+               /* Save stamp of the first retransmit. */
+               if (!tp->retrans_stamp)
+                       tp->retrans_stamp = TCP_SKB_CB(skb)->when;
+
+               tp->undo_retrans++;
+
+               /* snd_nxt is stored to detect loss of retransmitted segment,
+                * see tcp_input.c tcp_sacktag_write_queue().
+                */
+               TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
+       }
+       return err;
 }
 
 /* This gets called after a retransmit timeout, and the initially
@@ -746,71 +841,79 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 {
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
        struct sk_buff *skb;
+       int packet_cnt = tp->lost_out;
+
+       /* First pass: retransmit lost packets. */
+       if (packet_cnt) {
+               for_retrans_queue(skb, sk, tp) {
+                       __u8 sacked = TCP_SKB_CB(skb)->sacked;
+
+                       if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
+                               return;
+
+                       if (sacked&TCPCB_LOST) {
+                               if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
+                                       if (tcp_retransmit_skb(sk, skb))
+                                               return;
+                                       if (tp->ca_state != TCP_CA_Loss)
+                                               NET_INC_STATS_BH(TCPFastRetrans);
+                                       else
+                                               NET_INC_STATS_BH(TCPSlowStartRetrans);
+
+                                       if (skb == skb_peek(&sk->write_queue))
+                                               tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+                               }
+
+                               if (--packet_cnt <= 0)
+                                       break;
+                       }
+               }
+       }
+
+       /* OK, demanded retransmission is finished. */
+
+       /* Forward retransmissions are possible only during Recovery. */
+       if (tp->ca_state != TCP_CA_Recovery)
+               return;
 
-       if (tp->retrans_head == NULL &&
-           tp->rexmt_done == 0)
-               tp->retrans_head = skb_peek(&sk->write_queue);
-       if (tp->retrans_head == tp->send_head)
-               tp->retrans_head = NULL;
+       /* No forward retransmissions in Reno are possible. */
+       if (!tp->sack_ok)
+               return;
 
-       /* Each time, advance the retrans_head if we got
-        * a packet out or we skipped one because it was
-        * SACK'd.  -DaveM
+       /* Yeah, we have to make difficult choice between forward transmission
+        * and retransmission... Both ways have their merits...
+        *
+        * For now we do not retrnamsit anything, while we have some new
+        * segments to send.
         */
-       while ((skb = tp->retrans_head) != NULL) {
-               /* If it has been ack'd by a SACK block, we don't
-                * retransmit it.
-                */
-               if(!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
-                       /* Send it out, punt if error occurred. */
-                       if(tcp_retransmit_skb(sk, skb))
-                               break;
 
-                       update_retrans_head(sk);
-               
-                       /* Stop retransmitting if we've hit the congestion
-                        * window limit.
-                        */
-                       if (tp->retrans_out >= tp->snd_cwnd)
-                               break;
-               } else {
-                       update_retrans_head(sk);
-               }
-       }
-}
+       if (tcp_may_send_now(sk, tp))
+               return;
 
-/* Using FACK information, retransmit all missing frames at the receiver
- * up to the forward most SACK'd packet (tp->fackets_out) if the packet
- * has not been retransmitted already.
- */
-void tcp_fack_retransmit(struct sock *sk)
-{
-       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
-       struct sk_buff *skb = skb_peek(&sk->write_queue);
-       int packet_cnt = 0;
+       packet_cnt = 0;
 
-       while((skb != NULL) &&
-             (skb != tp->send_head) &&
-             (skb != (struct sk_buff *)&sk->write_queue)) {
-               __u8 sacked = TCP_SKB_CB(skb)->sacked;
+       for_retrans_queue(skb, sk, tp) {
+               if(++packet_cnt > tp->fackets_out)
+                       break;
 
-               if(sacked & (TCPCB_SACKED_ACKED | TCPCB_SACKED_RETRANS))
-                       goto next_packet;
+               if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
+                       break;
+
+               if(TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS)
+                       continue;
 
                /* Ok, retransmit it. */
                if(tcp_retransmit_skb(sk, skb))
                        break;
 
-               if(tcp_packets_in_flight(tp) >= tp->snd_cwnd)
-                       break;
-next_packet:
-               packet_cnt++;
-               if(packet_cnt >= tp->fackets_out)
-                       break;
-               skb = skb->next;
+               if (skb == skb_peek(&sk->write_queue))
+                       tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+
+               NET_INC_STATS_BH(TCPForwardRetrans);
        }
 }
 
+
 /* Send a fin.  The caller locks the socket for us.  This cannot be
  * allowed to fail queueing a FIN frame under any circumstances.
  */
@@ -839,30 +942,19 @@ void tcp_send_fin(struct sock *sk)
                /* Special case to avoid Nagle bogosity.  If this
                 * segment is the last segment, and it was queued
                 * due to Nagle/SWS-avoidance, send it out now.
-                *
-                * Hmm... actually it overrides also congestion
-                * avoidance (OK for FIN) and retransmit phase
-                * (not OK? Added.).
                 */
                if(tp->send_head == skb &&
-                  !after(tp->write_seq, tp->snd_una + tp->snd_wnd) &&
-                  !tp->retransmits) {
+                  !after(tp->write_seq, tp->snd_una + tp->snd_wnd)) {
                        TCP_SKB_CB(skb)->when = tcp_time_stamp;
-                       if (!tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))) {
-                               update_send_head(sk);
-                               tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
-                               tp->packets_out++;
-                               if(!tcp_timer_is_set(sk, TCP_TIME_RETRANS))
-                                       tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
-                       } else
+                       if (!tcp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)))
+                               update_send_head(sk, tp, skb);
+                       else
                                tcp_check_probe_timer(sk, tp);
                }
        } else {
                /* Socket is locked, keep trying until memory is available. */
                for (;;) {
-                       skb = sock_wmalloc(sk,
-                                          MAX_TCP_HEADER + 15,
-                                          1, GFP_KERNEL);
+                       skb = alloc_skb(MAX_TCP_HEADER + 15, GFP_KERNEL);
                        if (skb)
                                break;
                        current->policy |= SCHED_YIELD;
@@ -896,8 +988,10 @@ void tcp_send_active_reset(struct sock *sk, int priority)
 
        /* NOTE: No TCP options attached and we never retransmit this. */
        skb = alloc_skb(MAX_TCP_HEADER + 15, priority);
-       if (!skb)
+       if (!skb) {
+               NET_INC_STATS(TCPAbortFailed);
                return;
+       }
 
        /* Reserve space for headers and prepare control bits. */
        skb_reserve(skb, MAX_TCP_HEADER);
@@ -907,10 +1001,11 @@ void tcp_send_active_reset(struct sock *sk, int priority)
        TCP_SKB_CB(skb)->urg_ptr = 0;
 
        /* Send it off. */
-       TCP_SKB_CB(skb)->seq = tp->snd_nxt;
+       TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
        TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
        TCP_SKB_CB(skb)->when = tcp_time_stamp;
-       tcp_transmit_skb(sk, skb);
+       if (tcp_transmit_skb(sk, skb))
+               NET_INC_STATS(TCPAbortFailed);
 }
 
 /* WARNING: This routine must only be called when we have already sent
@@ -920,27 +1015,29 @@ void tcp_send_active_reset(struct sock *sk, int priority)
  */
 int tcp_send_synack(struct sock *sk)
 {
-       struct tcp_opt* tp = &(sk->tp_pinfo.af_tcp);
-       struct sk_buff* skb;    
+       struct sk_buff* skb;
 
-       skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15,
-                          1, GFP_ATOMIC);
-       if (skb == NULL) 
-               return -ENOMEM;
-
-       /* Reserve space for headers and prepare control bits. */
-       skb_reserve(skb, MAX_TCP_HEADER);
-       skb->csum = 0;
-       TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_SYN);
-       TCP_SKB_CB(skb)->sacked = 0;
-       TCP_SKB_CB(skb)->urg_ptr = 0;
+       skb = skb_peek(&sk->write_queue);
+       if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) {
+               printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
+               return -EFAULT;
+       }
+       if (!(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_ACK)) {
+               if (skb_cloned(skb)) {
+                       struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+                       if (nskb == NULL)
+                               return -ENOMEM;
+                       __skb_unlink(skb, &sk->write_queue);
+                       __skb_queue_head(&sk->write_queue, nskb);
+                       tcp_free_skb(sk, skb);
+                       tcp_charge_skb(sk, nskb);
+                       skb = nskb;
+               }
 
-       /* SYN eats a sequence byte. */
-       TCP_SKB_CB(skb)->seq = tp->snd_una;
-       TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
-       __skb_queue_tail(&sk->write_queue, skb);
+               TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK;
+               TCP_ECN_send_synack(&sk->tp_pinfo.af_tcp, skb);
+       }
        TCP_SKB_CB(skb)->when = tcp_time_stamp;
-       tp->packets_out++;
        return tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
 }
 
@@ -974,6 +1071,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
        memset(th, 0, sizeof(struct tcphdr));
        th->syn = 1;
        th->ack = 1;
+       TCP_ECN_make_synack(req, th);
        th->source = sk->sport;
        th->dest = req->rmt_port;
        TCP_SKB_CB(skb)->seq = req->snt_isn;
@@ -983,7 +1081,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
        if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
                __u8 rcv_wscale; 
                /* Set this up on the first call only */
-               req->window_clamp = tp->window_clamp ? : skb->dst->window;
+               req->window_clamp = tp->window_clamp ? : dst->window;
                /* tcp_full_space because it is guaranteed to be the first packet */
                tcp_select_initial_window(tcp_full_space(sk), 
                        dst->advmss - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
@@ -1028,18 +1126,20 @@ int tcp_connect(struct sock *sk, struct sk_buff *buff)
                tp->mss_clamp = tp->user_mss;
        tp->max_window = 0;
        tcp_sync_mss(sk, dst->pmtu);
-       tcp_initialize_rcv_mss(sk);
 
        if (!tp->window_clamp)
                tp->window_clamp = dst->window;
        tp->advmss = dst->advmss;
+       tcp_initialize_rcv_mss(sk);
 
        tcp_select_initial_window(tcp_full_space(sk),
-               tp->advmss - (tp->tcp_header_len - sizeof(struct tcphdr)),
-               &tp->rcv_wnd,
-               &tp->window_clamp,
-               sysctl_tcp_window_scaling,
-               &tp->rcv_wscale);
+                                 tp->advmss - (tp->ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
+                                 &tp->rcv_wnd,
+                                 &tp->window_clamp,
+                                 sysctl_tcp_window_scaling,
+                                 &tp->rcv_wscale);
+
+       tp->rcv_ssthresh = tp->rcv_wnd;
 
        /* Socket identity change complete, no longer
         * in TCP_CLOSE, so enter ourselves into the
@@ -1052,8 +1152,7 @@ int tcp_connect(struct sock *sk, struct sk_buff *buff)
        sk->err = 0;
        sk->done = 0;
        tp->snd_wnd = 0;
-       tp->snd_wl1 = 0;
-       tp->snd_wl2 = tp->write_seq;
+       tcp_init_wl(tp, tp->write_seq, 0);
        tp->snd_una = tp->write_seq;
        tp->snd_sml = tp->write_seq;
        tp->rcv_nxt = 0;
@@ -1063,21 +1162,23 @@ int tcp_connect(struct sock *sk, struct sk_buff *buff)
        tp->rto = TCP_TIMEOUT_INIT;
        tcp_init_xmit_timers(sk);
        tp->retransmits = 0;
-       tp->fackets_out = 0;
-       tp->retrans_out = 0;
+       tcp_clear_retrans(tp);
 
        TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
+       TCP_ECN_send_syn(tp, buff);
        TCP_SKB_CB(buff)->sacked = 0;
        TCP_SKB_CB(buff)->urg_ptr = 0;
        buff->csum = 0;
        TCP_SKB_CB(buff)->seq = tp->write_seq++;
        TCP_SKB_CB(buff)->end_seq = tp->write_seq;
        tp->snd_nxt = tp->write_seq;
+       tp->pushed_seq = tp->write_seq;
 
        /* Send it off. */
        TCP_SKB_CB(buff)->when = tcp_time_stamp;
-       tp->syn_stamp = TCP_SKB_CB(buff)->when;
+       tp->retrans_stamp = TCP_SKB_CB(buff)->when;
        __skb_queue_tail(&sk->write_queue, buff);
+       tcp_charge_skb(sk, buff);
        tp->packets_out++;
        tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
        TCP_INC_STATS(TcpActiveOpens);
@@ -1099,20 +1200,25 @@ err_out:
 void tcp_send_delayed_ack(struct sock *sk)
 {
        struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
-       long ato = tp->ack.ato;
+       int ato = tp->ack.ato;
        unsigned long timeout;
 
        if (ato > TCP_DELACK_MIN) {
-               int max_ato;
+               int max_ato = (tp->ack.pingpong || tp->ack.rcv_small) ?
+                       TCP_DELACK_MAX : (HZ/2);
+
+               /* Slow path, intersegment interval is "high". */
 
                /* If some rtt estimate is known, use it to bound delayed ack.
                 * Do not use tp->rto here, use results of rtt measurements
                 * directly.
                 */
-               if (tp->srtt)
-                       max_ato = (tp->srtt >> 3) + tp->mdev;
-               else
-                       max_ato = TCP_DELACK_MAX;
+               if (tp->srtt) {
+                       int rtt = max(tp->srtt>>3, TCP_DELACK_MIN);
+
+                       if (rtt < max_ato)
+                               max_ato = rtt;
+               }
 
                ato = min(ato, max_ato);
        }
@@ -1121,20 +1227,20 @@ void tcp_send_delayed_ack(struct sock *sk)
        timeout = jiffies + ato;
 
        /* Use new timeout only if there wasn't a older one earlier. */
-       if (timer_pending(&tp->delack_timer)) {
-               unsigned long old_timeout = tp->delack_timer.expires;
-
+       if (tp->ack.pending&2) {
                /* If delack timer was blocked or is about to expire,
                 * send ACK now.
                 */
-               if (tp->ack.blocked || time_before_eq(old_timeout, jiffies+(ato>>2))) {
+               if (tp->ack.blocked || time_before_eq(tp->ack.timeout, jiffies+(ato>>2))) {
                        tcp_send_ack(sk);
                        return;
                }
 
-               if (!time_before(timeout, old_timeout))
-                       timeout = old_timeout;
+               if (!time_before(timeout, tp->ack.timeout))
+                       timeout = tp->ack.timeout;
        }
+       tp->ack.pending = 3;
+       tp->ack.timeout = timeout;
        if (!mod_timer(&tp->delack_timer, timeout))
                sock_hold(sk);
 
@@ -1170,8 +1276,8 @@ void tcp_send_ack(struct sock *sk)
                 */
                buff = alloc_skb(MAX_TCP_HEADER + 15, GFP_ATOMIC);
                if (buff == NULL) {
-                       tp->ack.pending = 1;
-                       tp->ack.ato = TCP_ATO_MAX;
+                       tcp_schedule_ack(tp);
+                       tp->ack.ato = TCP_ATO_MIN;
                        tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX);
                        return;
                }
@@ -1184,7 +1290,7 @@ void tcp_send_ack(struct sock *sk)
                TCP_SKB_CB(buff)->urg_ptr = 0;
 
                /* Send it off, this clears delayed acks for us. */
-               TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tp->snd_nxt;
+               TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
                TCP_SKB_CB(buff)->when = tcp_time_stamp;
                tcp_transmit_skb(sk, buff);
        }
@@ -1193,66 +1299,68 @@ void tcp_send_ack(struct sock *sk)
 /* This routine sends a packet with an out of date sequence
  * number. It assumes the other end will try to ack it.
  */
+static int tcp_xmit_probe_skb(struct sock *sk)
+{
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       struct sk_buff *skb;
+
+       /* We don't queue it, tcp_transmit_skb() sets ownership. */
+       skb = alloc_skb(MAX_TCP_HEADER + 15, GFP_ATOMIC);
+       if (skb == NULL) 
+               return -1;
+
+       /* Reserve space for headers and set control bits. */
+       skb_reserve(skb, MAX_TCP_HEADER);
+       skb->csum = 0;
+       TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
+       TCP_SKB_CB(skb)->sacked = 0;
+       TCP_SKB_CB(skb)->urg_ptr = 0;
+
+       /* Use a previous sequence.  This should cause the other
+        * end to send an ack.  Don't queue or clone SKB, just
+        * send it.
+        */
+       TCP_SKB_CB(skb)->seq = tp->snd_una - 1;
+       TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
+       TCP_SKB_CB(skb)->when = tcp_time_stamp;
+       return tcp_transmit_skb(sk, skb);
+}
+
 int tcp_write_wakeup(struct sock *sk)
 {
        if (sk->state != TCP_CLOSE) {
                struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
                struct sk_buff *skb;
 
-               /* Now this function is never called, while
-                * we have something not ACKed in queue.
-                */
-               BUG_TRAP(tp->snd_una == tp->snd_nxt);
-
-               if (tp->snd_wnd > (tp->snd_nxt-tp->snd_una)
-                   && ((skb = tp->send_head) != NULL)) {
+               if ((skb = tp->send_head) != NULL &&
+                   before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
                        int err;
-                       unsigned long win_size;
+                       int mss = tcp_current_mss(sk);
+                       int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq;
+
+                       if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
+                               tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
 
                        /* We are probing the opening of a window
                         * but the window size is != 0
                         * must have been a result SWS avoidance ( sender )
                         */
-                       win_size = tp->snd_wnd - (tp->snd_nxt - tp->snd_una);
-                       if (win_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq) {
-                               if (tcp_fragment(sk, skb, win_size))
+                       if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
+                           skb->len > mss) {
+                               seg_size = min(seg_size, mss);
+                               TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
+                               if (tcp_fragment(sk, skb, seg_size))
                                        return -1;
                        }
+                       TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
                        TCP_SKB_CB(skb)->when = tcp_time_stamp;
                        err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
                        if (!err) {
-                               update_send_head(sk);
-                               tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
-                               tp->packets_out++;
-                               if (!tcp_timer_is_set(sk, TCP_TIME_RETRANS))
-                                       tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+                               update_send_head(sk, tp, skb);
                        }
                        return err;
                } else {
-                       /* We don't queue it, tcp_transmit_skb() sets ownership. */
-                       skb = alloc_skb(MAX_TCP_HEADER + 15, GFP_ATOMIC);
-                       if (skb == NULL) 
-                               return -1;
-
-                       /* Reserve space for headers and set control bits. */
-                       skb_reserve(skb, MAX_TCP_HEADER);
-                       skb->csum = 0;
-                       TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
-                       TCP_SKB_CB(skb)->sacked = 0;
-                       TCP_SKB_CB(skb)->urg_ptr = 0;
-
-                       /* Use a previous sequence.  This should cause the other
-                        * end to send an ack.  Don't queue or clone SKB, just
-                        * send it.
-                        *
-                        * RED-PEN: logically it should be snd_una-1.
-                        * snd_nxt-1 will not be acked. snd_una==snd_nxt
-                        * in this place however. Right?
-                        */
-                       TCP_SKB_CB(skb)->seq = tp->snd_una - 1;
-                       TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
-                       TCP_SKB_CB(skb)->when = tcp_time_stamp;
-                       return tcp_transmit_skb(sk, skb);
+                       return tcp_xmit_probe_skb(sk);
                }
        }
        return -1;
index 4ed38175b818e3e53a7bb55dd97472af54c6e760..52c39a6b5aee1acb414a7db4f76009b8d15a24e8 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:    $Id: tcp_timer.c,v 1.77 2000/06/30 10:18:38 davem Exp $
+ * Version:    $Id: tcp_timer.c,v 1.78 2000/08/09 11:59:04 davem Exp $
  *
  * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -29,13 +29,11 @@ int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
 int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
 int sysctl_tcp_retries1 = TCP_RETR1;
 int sysctl_tcp_retries2 = TCP_RETR2;
-int sysctl_tcp_orphan_retries = TCP_ORPHAN_RETRIES;
+int sysctl_tcp_orphan_retries = 0;
 
-static void tcp_retransmit_timer(unsigned long);
+static void tcp_write_timer(unsigned long);
 static void tcp_delack_timer(unsigned long);
-static void tcp_probe_timer(unsigned long);
 static void tcp_keepalive_timer (unsigned long data);
-static void tcp_twkill(unsigned long);
 
 const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
 
@@ -50,73 +48,35 @@ void tcp_init_xmit_timers(struct sock *sk)
        struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 
        init_timer(&tp->retransmit_timer);
-       tp->retransmit_timer.function=&tcp_retransmit_timer;
+       tp->retransmit_timer.function=&tcp_write_timer;
        tp->retransmit_timer.data = (unsigned long) sk;
+       tp->pending = 0;
 
        init_timer(&tp->delack_timer);
        tp->delack_timer.function=&tcp_delack_timer;
        tp->delack_timer.data = (unsigned long) sk;
-
-       init_timer(&tp->probe_timer);
-       tp->probe_timer.function=&tcp_probe_timer;
-       tp->probe_timer.data = (unsigned long) sk;
+       tp->ack.pending = 0;
 
        init_timer(&sk->timer);
        sk->timer.function=&tcp_keepalive_timer;
        sk->timer.data = (unsigned long) sk;
 }
 
-/*
- *     Reset the retransmission timer
- */
-void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
-{
-       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
-
-       switch (what) {
-       case TCP_TIME_RETRANS:
-               /* When seting the transmit timer the probe timer 
-                * should not be set.
-                * The delayed ack timer can be set if we are changing the
-                * retransmit timer when removing acked frames.
-                */
-               if (timer_pending(&tp->probe_timer) && del_timer(&tp->probe_timer))
-                       __sock_put(sk);
-               if (when > TCP_RTO_MAX) {
-                       printk(KERN_DEBUG "reset_xmit_timer sk=%p when=0x%lx, caller=%p\n", sk, when, NET_CALLER(sk));
-                       when = TCP_RTO_MAX;
-               }
-               if (!mod_timer(&tp->retransmit_timer, jiffies+when))
-                       sock_hold(sk);
-               break;
-
-       case TCP_TIME_DACK:
-               if (!mod_timer(&tp->delack_timer, jiffies+when))
-                       sock_hold(sk);
-               break;
-
-       case TCP_TIME_PROBE0:
-               if (!mod_timer(&tp->probe_timer, jiffies+when))
-                       sock_hold(sk);
-               break;
-
-       default:
-               printk(KERN_DEBUG "bug: unknown timer value\n");
-       };
-}
-
 void tcp_clear_xmit_timers(struct sock *sk)
-{      
+{
        struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 
-       if(timer_pending(&tp->retransmit_timer) && del_timer(&tp->retransmit_timer))
-               __sock_put(sk);
-       if(timer_pending(&tp->delack_timer) && del_timer(&tp->delack_timer))
+       tp->pending = 0;
+       if (timer_pending(&tp->retransmit_timer) &&
+           del_timer(&tp->retransmit_timer))
                __sock_put(sk);
+
+       tp->ack.pending = 0;
        tp->ack.blocked = 0;
-       if(timer_pending(&tp->probe_timer) && del_timer(&tp->probe_timer))
+       if (timer_pending(&tp->delack_timer) &&
+           del_timer(&tp->delack_timer))
                __sock_put(sk);
+
        if(timer_pending(&sk->timer) && del_timer(&sk->timer))
                __sock_put(sk);
 }
@@ -127,6 +87,7 @@ static void tcp_write_err(struct sock *sk)
        sk->error_report(sk);
 
        tcp_done(sk);
+       NET_INC_STATS_BH(TCPAbortOnTimeout);
 }
 
 /* Do not allow orphaned sockets to eat all our resources.
@@ -138,26 +99,60 @@ static void tcp_write_err(struct sock *sk)
  * We kill the socket, if:
  * 1. If number of orphaned sockets exceeds an administratively configured
  *    limit.
- * 2. Under pessimistic assumption that all the orphans eat memory not
- *    less than this one, total consumed memory exceeds all
- *    the available memory.
+ * 2. If we have strong memory pressure.
  */
 static int tcp_out_of_resources(struct sock *sk, int do_reset)
 {
+       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
        int orphans = atomic_read(&tcp_orphan_count);
 
+       /* If peer does not open window for long time, or did not transmit 
+        * anything for long time, penalize it. */
+       if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset)
+               orphans <<= 1;
+
+       /* If some dubious ICMP arrived, penalize even more. */
+       if (sk->err_soft)
+               orphans <<= 1;
+
        if (orphans >= sysctl_tcp_max_orphans ||
-           ((orphans*atomic_read(&sk->wmem_alloc))>>PAGE_SHIFT) >= num_physpages) {
+           (sk->wmem_queued > SOCK_MIN_SNDBUF &&
+            atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2])) {
                if (net_ratelimit())
                        printk(KERN_INFO "Out of socket memory\n");
+
+               /* Catch exceptional cases, when connection requires reset.
+                *      1. Last segment was sent recently. */
+               if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
+                   /*  2. Window is closed. */
+                   (!tp->snd_wnd && !tp->packets_out))
+                       do_reset = 1;
                if (do_reset)
                        tcp_send_active_reset(sk, GFP_ATOMIC);
                tcp_done(sk);
+               NET_INC_STATS_BH(TCPAbortOnMemory);
                return 1;
        }
        return 0;
 }
 
+/* Calculate maximal number or retries on an orphaned socket. */
+static int tcp_orphan_retries(struct sock *sk, int alive)
+{
+       int retries = sysctl_tcp_orphan_retries; /* May be zero. */
+
+       /* We know from an ICMP that something is wrong. */
+       if (sk->err_soft && !alive)
+               retries = 0;
+
+       /* However, if socket sent something recently, select some safe
+        * number of retries. 8 corresponds to >100 seconds with minimal
+        * RTO of 200msec. */
+       if (retries == 0 && alive)
+               retries = 8;
+       return retries;
+}
+
 /* A write timeout has occurred. Process the after effects. */
 static int tcp_write_timeout(struct sock *sk)
 {
@@ -195,10 +190,12 @@ static int tcp_write_timeout(struct sock *sk)
 
                retry_until = sysctl_tcp_retries2;
                if (sk->dead) {
-                       if (tcp_out_of_resources(sk, tp->retransmits < retry_until))
-                               return 1;
+                       int alive = (tp->rto < TCP_RTO_MAX);
+                       retry_until = tcp_orphan_retries(sk, alive);
 
-                       retry_until = sysctl_tcp_orphan_retries;
+                       if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until))
+                               return 1;
                }
        }
 
@@ -220,14 +217,38 @@ static void tcp_delack_timer(unsigned long data)
                /* Try again later. */
                tp->ack.blocked = 1;
                NET_INC_STATS_BH(DelayedACKLocked);
-               tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MIN);
+               if (!mod_timer(&tp->delack_timer, jiffies + TCP_DELACK_MIN))
+                       sock_hold(sk);
                goto out_unlock;
        }
 
-       if (tp->ack.pending) {
+       tcp_mem_reclaim(sk);
+
+       if (sk->state == TCP_CLOSE || !(tp->ack.pending&2))
+               goto out;
+
+       if ((long)(tp->ack.timeout - jiffies) > 0) {
+               if (!mod_timer(&tp->delack_timer, tp->ack.timeout))
+                       sock_hold(sk);
+               goto out;
+       }
+       tp->ack.pending &= ~2;
+
+       if (skb_queue_len(&tp->ucopy.prequeue)) {
+               struct sk_buff *skb;
+
+               net_statistics[smp_processor_id()*2].TCPSchedulerFailed += skb_queue_len(&tp->ucopy.prequeue);
+
+               while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
+                       sk->backlog_rcv(sk, skb);
+
+               tp->ucopy.memory = 0;
+       }
+
+       if (tcp_ack_scheduled(tp)) {
                if (!tp->ack.pingpong) {
                        /* Delayed ACK missed: inflate ATO. */
-                       tp->ack.ato = min(tp->ack.ato<<1, TCP_ATO_MAX);
+                       tp->ack.ato = min(tp->ack.ato<<1, tp->rto);
                } else {
                        /* Delayed ACK missed: leave pingpong mode and
                         * deflate ATO.
@@ -240,30 +261,22 @@ static void tcp_delack_timer(unsigned long data)
        }
        TCP_CHECK_TIMER(sk);
 
+out:
+       if (tcp_memory_pressure)
+               tcp_mem_reclaim(sk);
 out_unlock:
        bh_unlock_sock(sk);
        sock_put(sk);
 }
 
-static void tcp_probe_timer(unsigned long data)
+static void tcp_probe_timer(struct sock *sk)
 {
-       struct sock *sk = (struct sock*)data;
        struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
        int max_probes;
 
-       bh_lock_sock(sk);
-       if (sk->lock.users) {
-               /* Try again later. */
-               tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, HZ/5);
-               goto out_unlock;
-       }
-
-       if (sk->state == TCP_CLOSE)
-               goto out_unlock;
-
        if (tp->packets_out || !tp->send_head) {
                tp->probes_out = 0;
-               goto out_unlock;
+               return;
        }
 
        /* *WARNING* RFC 1122 forbids this
@@ -284,10 +297,12 @@ static void tcp_probe_timer(unsigned long data)
        max_probes = sysctl_tcp_retries2;
 
        if (sk->dead) {
-               if (tcp_out_of_resources(sk, tp->probes_out <= max_probes))
-                       goto out_unlock;
+               int alive = ((tp->rto<<tp->backoff) < TCP_RTO_MAX);
+               max_probes = tcp_orphan_retries(sk, alive);
 
-               max_probes = sysctl_tcp_orphan_retries;
+               if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes))
+                       return;
        }
 
        if (tp->probes_out > max_probes) {
@@ -295,284 +310,47 @@ static void tcp_probe_timer(unsigned long data)
        } else {
                /* Only send another probe if we didn't close things up. */
                tcp_send_probe0(sk);
-               TCP_CHECK_TIMER(sk);
        }
-out_unlock:
-       bh_unlock_sock(sk);
-       sock_put(sk);
 }
 
-
-/* Kill off TIME_WAIT sockets once their lifetime has expired. */
-static int tcp_tw_death_row_slot = 0;
-int tcp_tw_count = 0;
-
-static struct tcp_tw_bucket *tcp_tw_death_row[TCP_TWKILL_SLOTS];
-static spinlock_t tw_death_lock = SPIN_LOCK_UNLOCKED;
-static struct timer_list tcp_tw_timer = { function: tcp_twkill };
-
-static void SMP_TIMER_NAME(tcp_twkill)(unsigned long dummy)
-{
-       struct tcp_tw_bucket *tw;
-       int killed = 0;
-
-       /* NOTE: compare this to previous version where lock
-        * was released after detaching chain. It was racy,
-        * because tw buckets are scheduled in not serialized context
-        * in 2.3 (with netfilter), and with softnet it is common, because
-        * soft irqs are not sequenced.
-        */
-       spin_lock(&tw_death_lock);
-
-       if (tcp_tw_count == 0)
-               goto out;
-
-       while((tw = tcp_tw_death_row[tcp_tw_death_row_slot]) != NULL) {
-               tcp_tw_death_row[tcp_tw_death_row_slot] = tw->next_death;
-               tw->pprev_death = NULL;
-               spin_unlock(&tw_death_lock);
-
-               tcp_timewait_kill(tw);
-               tcp_tw_put(tw);
-
-               killed++;
-
-               spin_lock(&tw_death_lock);
-       }
-       tcp_tw_death_row_slot =
-               ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1));
-
-       if ((tcp_tw_count -= killed) != 0)
-               mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
-       net_statistics[smp_processor_id()*2].TimeWaited += killed;
-out:
-       spin_unlock(&tw_death_lock);
-}
-
-SMP_TIMER_DEFINE(tcp_twkill, tcp_twkill_task);
-
-/* These are always called from BH context.  See callers in
- * tcp_input.c to verify this.
- */
-
-/* This is for handling early-kills of TIME_WAIT sockets. */
-void tcp_tw_deschedule(struct tcp_tw_bucket *tw)
-{
-       spin_lock(&tw_death_lock);
-       if (tw->pprev_death) {
-               if(tw->next_death)
-                       tw->next_death->pprev_death = tw->pprev_death;
-               *tw->pprev_death = tw->next_death;
-               tw->pprev_death = NULL;
-               tcp_tw_put(tw);
-               if (--tcp_tw_count == 0)
-                       del_timer(&tcp_tw_timer);
-       }
-       spin_unlock(&tw_death_lock);
-}
-
-/* Short-time timewait calendar */
-
-static int tcp_twcal_hand = -1;
-static int tcp_twcal_jiffie;
-static void tcp_twcal_tick(unsigned long);
-static struct timer_list tcp_twcal_timer = {function: tcp_twcal_tick};
-static struct tcp_tw_bucket *tcp_twcal_row[TCP_TW_RECYCLE_SLOTS];
-
-void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo)
-{
-       struct tcp_tw_bucket **tpp;
-       int slot;
-
-       /* timeout := RTO * 3.5
-        *
-        * 3.5 = 1+2+0.5 to wait for two retransmits.
-        *
-        * RATIONALE: if FIN arrived and we entered TIME-WAIT state,
-        * our ACK acking that FIN can be lost. If N subsequent retransmitted
-        * FINs (or previous seqments) are lost (probability of such event
-        * is p^(N+1), where p is probability to lose single packet and
-        * time to detect the loss is about RTO*(2^N - 1) with exponential
-        * backoff). Normal timewait length is calculated so, that we
-        * waited at least for one retransmitted FIN (maximal RTO is 120sec).
-        * [ BTW Linux. following BSD, violates this requirement waiting
-        *   only for 60sec, we should wait at least for 240 secs.
-        *   Well, 240 consumes too much of resources 8)
-        * ]
-        * This interval is not reduced to catch old duplicate and
-        * responces to our wandering segments living for two MSLs.
-        * However, if we use PAWS to detect
-        * old duplicates, we can reduce the interval to bounds required
-        * by RTO, rather than MSL. So, if peer understands PAWS, we
-        * kill tw bucket after 3.5*RTO (it is important that this number
-        * is greater than TS tick!) and detect old duplicates with help
-        * of PAWS.
-        */
-       slot = (timeo + (1<<TCP_TW_RECYCLE_TICK) - 1) >> TCP_TW_RECYCLE_TICK;
-
-       spin_lock(&tw_death_lock);
-
-       /* Unlink it, if it was scheduled */
-       if (tw->pprev_death) {
-               if(tw->next_death)
-                       tw->next_death->pprev_death = tw->pprev_death;
-               *tw->pprev_death = tw->next_death;
-               tw->pprev_death = NULL;
-               tcp_tw_count--;
-       } else
-               atomic_inc(&tw->refcnt);
-
-       if (slot >= TCP_TW_RECYCLE_SLOTS) {
-               /* Schedule to slow timer */
-               if (timeo >= TCP_TIMEWAIT_LEN) {
-                       slot = TCP_TWKILL_SLOTS-1;
-               } else {
-                       slot = (timeo + TCP_TWKILL_PERIOD-1) / TCP_TWKILL_PERIOD;
-                       if (slot >= TCP_TWKILL_SLOTS)
-                               slot = TCP_TWKILL_SLOTS-1;
-               }
-               tw->ttd = jiffies + timeo;
-               slot = (tcp_tw_death_row_slot + slot) & (TCP_TWKILL_SLOTS - 1);
-               tpp = &tcp_tw_death_row[slot];
-       } else {
-               tw->ttd = jiffies + (slot<<TCP_TW_RECYCLE_TICK);
-
-               if (tcp_twcal_hand < 0) {
-                       tcp_twcal_hand = 0;
-                       tcp_twcal_jiffie = jiffies;
-                       tcp_twcal_timer.expires = tcp_twcal_jiffie + (slot<<TCP_TW_RECYCLE_TICK);
-                       add_timer(&tcp_twcal_timer);
-               } else {
-                       if ((long)(tcp_twcal_timer.expires - jiffies) > (slot<<TCP_TW_RECYCLE_TICK))
-                               mod_timer(&tcp_twcal_timer, jiffies + (slot<<TCP_TW_RECYCLE_TICK));
-                       slot = (tcp_twcal_hand + slot)&(TCP_TW_RECYCLE_SLOTS-1);
-               }
-               tpp = &tcp_twcal_row[slot];
-       }
-
-       if((tw->next_death = *tpp) != NULL)
-               (*tpp)->pprev_death = &tw->next_death;
-       *tpp = tw;
-       tw->pprev_death = tpp;
-
-       if (tcp_tw_count++ == 0)
-               mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD);
-       spin_unlock(&tw_death_lock);
-}
-
-void SMP_TIMER_NAME(tcp_twcal_tick)(unsigned long dummy)
-{
-       int n, slot;
-       unsigned long j;
-       unsigned long now = jiffies;
-       int killed = 0;
-       int adv = 0;
-
-       spin_lock(&tw_death_lock);
-       if (tcp_twcal_hand < 0)
-               goto out;
-
-       slot = tcp_twcal_hand;
-       j = tcp_twcal_jiffie;
-
-       for (n=0; n<TCP_TW_RECYCLE_SLOTS; n++) {
-               if ((long)(j - now) <= 0) {
-                       struct tcp_tw_bucket *tw;
-
-                       while((tw = tcp_twcal_row[slot]) != NULL) {
-                               tcp_twcal_row[slot] = tw->next_death;
-                               tw->pprev_death = NULL;
-
-                               tcp_timewait_kill(tw);
-                               tcp_tw_put(tw);
-                               killed++;
-                       }
-               } else {
-                       if (!adv) {
-                               adv = 1;
-                               tcp_twcal_jiffie = j;
-                               tcp_twcal_hand = slot;
-                       }
-
-                       if (tcp_twcal_row[slot] != NULL) {
-                               mod_timer(&tcp_twcal_timer, j);
-                               goto out;
-                       }
-               }
-               j += (1<<TCP_TW_RECYCLE_TICK);
-               slot = (slot+1)&(TCP_TW_RECYCLE_SLOTS-1);
-       }
-       tcp_twcal_hand = -1;
-
-out:
-       if ((tcp_tw_count -= killed) == 0)
-               del_timer(&tcp_tw_timer);
-       net_statistics[smp_processor_id()*2].TimeWaitKilled += killed;
-       spin_unlock(&tw_death_lock);
-}
-
-SMP_TIMER_DEFINE(tcp_twcal_tick, tcp_twcal_tasklet);
-
 /*
  *     The TCP retransmit timer.
  */
 
-static void tcp_retransmit_timer(unsigned long data)
+static void tcp_retransmit_timer(struct sock *sk)
 {
-       struct sock *sk = (struct sock*)data;
        struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
 
-       bh_lock_sock(sk);
-       if (sk->lock.users) {
-               /* Try again later */  
-               tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, HZ/20);
-               goto out_unlock;
-       }
-
-       if (sk->state == TCP_CLOSE || tp->packets_out == 0)
-               goto out_unlock;
+       if (tp->packets_out == 0)
+               goto out;
 
        BUG_TRAP(!skb_queue_empty(&sk->write_queue));
 
        if (tcp_write_timeout(sk))
-               goto out_unlock;
+               goto out;
 
-       /* RFC 2018, clear all 'sacked' flags in retransmission queue,
-        * the sender may have dropped out of order frames and we must
-        * send them out should this timer fire on us.
-        */
-       if(tp->sack_ok) {
-               struct sk_buff *skb = skb_peek(&sk->write_queue);
-
-               while((skb != NULL) &&
-                     (skb != tp->send_head) &&
-                     (skb != (struct sk_buff *)&sk->write_queue)) {
-                       TCP_SKB_CB(skb)->sacked &=
-                               ~(TCPCB_SACKED_ACKED | TCPCB_SACKED_RETRANS);
-                       skb = skb->next;
+       if (tp->retransmits == 0) {
+               if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) {
+                       if (tp->sack_ok) {
+                               if (tp->ca_state == TCP_CA_Recovery)
+                                       NET_INC_STATS_BH(TCPSackRecoveryFail);
+                               else
+                                       NET_INC_STATS_BH(TCPSackFailures);
+                       } else {
+                               if (tp->ca_state == TCP_CA_Recovery)
+                                       NET_INC_STATS_BH(TCPRenoRecoveryFail);
+                               else
+                                       NET_INC_STATS_BH(TCPRenoFailures);
+                       }
+               } else if (tp->ca_state == TCP_CA_Loss) {
+                       NET_INC_STATS_BH(TCPLossFailures);
+               } else {
+                       NET_INC_STATS_BH(TCPTimeouts);
                }
        }
 
-       /* Retransmission. */
-       tp->retrans_head = NULL;
-       tp->rexmt_done = 0;
-       tp->fackets_out = 0;
-       tp->retrans_out = 0;
-       if (tp->retransmits == 0) {
-               /* Remember window where we lost:
-                * "one half of the current window but at least 2 segments"
-                *
-                * Here "current window" means the effective one, which
-                * means it must be an accurate representation of our current
-                * sending rate _and_ the snd_wnd.
-                */
-               tp->snd_ssthresh = tcp_recalc_ssthresh(tp);
-               tp->snd_cwnd_cnt = 0;
-               tp->snd_cwnd = 1;
-       }
+       tcp_enter_loss(sk, 0);
 
-       tp->dup_acks = 0;
-       tp->high_seq = tp->snd_nxt;
        if (tcp_retransmit_skb(sk, skb_peek(&sk->write_queue)) > 0) {
                /* Retransmission failed because of local congestion,
                 * do not backoff.
@@ -581,8 +359,7 @@ static void tcp_retransmit_timer(unsigned long data)
                        tp->retransmits=1;
                tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS,
                                     min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL));
-               TCP_CHECK_TIMER(sk);
-               goto out_unlock;
+               goto out;
        }
 
        /* Increase the timeout each time we retransmit.  Note that
@@ -606,8 +383,48 @@ static void tcp_retransmit_timer(unsigned long data)
        tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
        if (tp->retransmits > sysctl_tcp_retries1)
                __sk_dst_reset(sk);
+
+out:
+}
+
+static void tcp_write_timer(unsigned long data)
+{
+       struct sock *sk = (struct sock*)data;
+       struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+       int event;
+
+       bh_lock_sock(sk);
+       if (sk->lock.users) {
+               /* Try again later */
+               if (!mod_timer(&tp->retransmit_timer, jiffies + (HZ/20)))
+                       sock_hold(sk);
+               goto out_unlock;
+       }
+
+       if (sk->state == TCP_CLOSE || !tp->pending)
+               goto out;
+
+       if ((long)(tp->timeout - jiffies) > 0) {
+               if (!mod_timer(&tp->retransmit_timer, tp->timeout))
+                       sock_hold(sk);
+               goto out;
+       }
+
+       event = tp->pending;
+       tp->pending = 0;
+
+       switch (event) {
+       case TCP_TIME_RETRANS:
+               tcp_retransmit_timer(sk);
+               break;
+       case TCP_TIME_PROBE0:
+               tcp_probe_timer(sk);
+               break;
+       }
        TCP_CHECK_TIMER(sk);
 
+out:
+       tcp_mem_reclaim(sk);
 out_unlock:
        bh_unlock_sock(sk);
        sock_put(sk);
@@ -794,6 +611,7 @@ static void tcp_keepalive_timer (unsigned long data)
        }
 
        TCP_CHECK_TIMER(sk);
+       tcp_mem_reclaim(sk);
 
 resched:
        tcp_reset_keepalive_timer (sk, elapsed);
index dec2a6126dbc13c7bba01321a926208ab003a5f6..59afc3ceefa429e8ff9da148a1113e30001f3128 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             The User Datagram Protocol (UDP).
  *
- * Version:    $Id: udp.c,v 1.84 2000/07/08 00:20:43 davem Exp $
+ * Version:    $Id: udp.c,v 1.85 2000/08/09 11:59:04 davem Exp $
  *
  * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -997,8 +997,8 @@ static void get_udp_sock(struct sock *sp, char *tmpbuf, int i)
                i, src, srcp, dest, destp, sp->state, 
                atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
                timer_active, timer_expires-jiffies, 0,
-               sp->socket->inode->i_uid, 0,
-               sp->socket ? sp->socket->inode->i_ino : 0,
+               sock_i_uid(sp), 0,
+               sock_i_ino(sp),
                atomic_read(&sp->refcnt), sp);
 }
 
index e83870421f2b09b0c4ed65f631bab7e9bda82951..0b7e0025236ea8142ce47aaeb46049908dc4d9bc 100644 (file)
@@ -7,7 +7,7 @@
  *
  *     Adapted from linux/net/ipv4/raw.c
  *
- *     $Id: raw.c,v 1.39 2000/07/08 00:20:43 davem Exp $
+ *     $Id: raw.c,v 1.40 2000/08/09 11:59:04 davem Exp $
  *
  *     Fixes:
  *     Hideaki YOSHIFUJI       :       sin6_scope_id support
@@ -763,8 +763,8 @@ static void get_raw6_sock(struct sock *sp, char *tmpbuf, int i)
                sp->state, 
                atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
                sock_timer_active, timer_expires-jiffies, 0,
-               sp->socket->inode->i_uid, 0,
-               sp->socket ? sp->socket->inode->i_ino : 0,
+               sock_i_uid(sp), 0,
+               sock_i_ino(sp),
                atomic_read(&sp->refcnt), sp);
 }
 
index dc5ddffd8ba3a5fc2c08bfd2bc6513100e0d3642..e1832fd8c2e04710dcb285b66b280abf292ea7f8 100644 (file)
@@ -5,7 +5,7 @@
  *     Authors:
  *     Pedro Roque             <roque@di.fc.ul.pt>     
  *
- *     $Id: route.c,v 1.46 2000/07/07 22:40:35 davem Exp $
+ *     $Id: route.c,v 1.48 2000/08/10 01:17:13 davem Exp $
  *
  *     This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -93,7 +93,7 @@ struct dst_ops ip6_dst_ops = {
 
 struct rt6_info ip6_null_entry = {
        {{NULL, ATOMIC_INIT(1), 1, &loopback_dev,
-         -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+         -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          -ENETUNREACH, NULL, NULL,
          ip6_pkt_discard, ip6_pkt_discard,
 #ifdef CONFIG_NET_CLS_ROUTE
index 81c3477baabe62edd3d8035e2264cf9cd09ae19f..952e2476516aa5ac0f605312f113953363c9f411 100644 (file)
@@ -5,7 +5,7 @@
  *     Authors:
  *     Pedro Roque             <roque@di.fc.ul.pt>     
  *
- *     $Id: tcp_ipv6.c,v 1.124 2000/05/03 06:37:07 davem Exp $
+ *     $Id: tcp_ipv6.c,v 1.125 2000/08/09 11:59:04 davem Exp $
  *
  *     Based on: 
  *     linux/net/ipv4/tcp.c
@@ -46,6 +46,7 @@
 #include <net/transp_v6.h>
 #include <net/addrconf.h>
 #include <net/ip6_route.h>
+#include <net/inet_ecn.h>
 
 #include <asm/uaccess.h>
 
@@ -152,7 +153,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
                                                    !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
                                                                   sk2->state != TCP_TIME_WAIT ?
                                                                   &sk2->net_pinfo.af_inet6.rcv_saddr :
-                                                                  &((struct tcp_tw_bucket*)sk)->v6_rcv_saddr))
+                                                                  &((struct tcp_tw_bucket*)sk)->v6_rcv_saddr) ||
+                                                   (addr_type==IPV6_ADDR_MAPPED && sk2->family==AF_INET &&
+                                                    sk->rcv_saddr==sk2->rcv_saddr))
                                                        break;
                                        }
                                }
@@ -430,10 +433,9 @@ static int tcp_v6_check_established(struct sock *sk)
                   sk2->bound_dev_if == sk->bound_dev_if) {
                        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
-                       if (tw->substate == TCP_TIME_WAIT &&
-                           sysctl_tcp_tw_recycle && tw->ts_recent_stamp) {
+                       if (tw->ts_recent_stamp) {
                                /* See comment in tcp_ipv4.c */
-                               if ((tp->write_seq = tw->snd_nxt + 2) == 0)
+                               if ((tp->write_seq = tw->snd_nxt+65535+2) == 0)
                                        tp->write_seq = 1;
                                tp->ts_recent = tw->ts_recent;
                                tp->ts_recent_stamp = tw->ts_recent_stamp;
@@ -526,6 +528,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
        fl.fl6_flowlabel = 0;
        if (np->sndflow) {
                fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+               IP6_ECN_flow_init(fl.fl6_flowlabel);
                if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
                        struct ip6_flowlabel *flowlabel;
                        flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
@@ -644,6 +647,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
        /* set the source address */
        ipv6_addr_copy(&np->rcv_saddr, saddr);
        ipv6_addr_copy(&np->saddr, saddr);
+       sk->rcv_saddr= LOOPBACK4_IPV6;
 
        tp->ext_header_len = 0;
        if (np->opt)
@@ -651,7 +655,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
        tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 
        err = -ENOBUFS;
-       buff = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 0, GFP_KERNEL);
+       buff = alloc_skb(MAX_TCP_HEADER + 15, GFP_KERNEL);
 
        if (buff == NULL)
                goto failure;
@@ -1072,33 +1076,30 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
        struct open_request *req, **prev;
        struct tcphdr *th = skb->h.th;
        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+       struct sock *nsk;
 
        /* Find possible connection requests. */
        req = tcp_v6_search_req(tp, skb->nh.ipv6h, th, tcp_v6_iif(skb), &prev);
        if (req)
                return tcp_check_req(sk, skb, req, prev);
 
-       if (tp->accept_queue) {
-               struct sock *nsk;
+       nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
+                                         th->source,
+                                         &skb->nh.ipv6h->daddr,
+                                         ntohs(th->dest),
+                                         tcp_v6_iif(skb));
 
-               nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
-                                                 th->source,
-                                                 &skb->nh.ipv6h->daddr,
-                                                 ntohs(th->dest),
-                                                 tcp_v6_iif(skb));
-
-               if (nsk) {
-                       if (nsk->state != TCP_TIME_WAIT) {
-                               bh_lock_sock(nsk);
-                               return nsk;
-                       }
-                       tcp_tw_put((struct tcp_tw_bucket*)sk);
-                       return NULL;
+       if (nsk) {
+               if (nsk->state != TCP_TIME_WAIT) {
+                       bh_lock_sock(nsk);
+                       return nsk;
                }
+               tcp_tw_put((struct tcp_tw_bucket*)sk);
+               return NULL;
        }
 
 #if 0 /*def CONFIG_SYN_COOKIES*/
-       if (!th->rst && (th->syn || th->ack))
+       if (!th->rst && !th->syn && th->ack)
                sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
 #endif
        return sk;
@@ -1160,13 +1161,14 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
        tp.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
        tp.user_mss = sk->tp_pinfo.af_tcp.user_mss;
 
-       tcp_parse_options(NULL, skb->h.th, &tp, 0);
+       tcp_parse_options(skb, &tp);
 
        tcp_openreq_init(req, &tp, skb);
 
        req->class = &or_ipv6;
        ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
        ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
+       TCP_ECN_create_request(req, skb->h.th);
        req->af.v6_req.pktopts = NULL;
        if (ipv6_opt_accepted(sk, skb) ||
            sk->net_pinfo.af_inet6.rxopt.bits.rxinfo ||
@@ -1344,10 +1346,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
                newtp->ext_header_len = np->opt->opt_nflen + np->opt->opt_flen;
 
        tcp_sync_mss(newsk, dst->pmtu);
-       tcp_initialize_rcv_mss(newsk);
        newtp->advmss = dst->advmss;
-
-       tcp_init_buffer_space(newsk);
+       tcp_initialize_rcv_mss(newsk);
 
        newsk->daddr    = LOOPBACK4_IPV6;
        newsk->saddr    = LOOPBACK4_IPV6;
@@ -1377,8 +1377,8 @@ static int tcp_v6_checksum_init(struct sk_buff *skb)
                        return -1;
                }
                skb->ip_summed = CHECKSUM_UNNECESSARY;
-       } else if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
-               if (skb->len <= 68) {
+       } else {
+               if (skb->len <= 76) {
                        if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
                                         &skb->nh.ipv6h->daddr,csum_partial((char *)skb->h.th, skb->len, 0)))
                                return -1;
@@ -1404,7 +1404,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 #ifdef CONFIG_FILTER
        struct sk_filter *filter;
 #endif
-       int users = 0;
+       struct sk_buff *opt_skb = NULL;
 
        /* Imagine: socket is IPv6. IPv4 packet arrives,
           goes to IPv4 receive handler and backlogged.
@@ -1443,22 +1443,20 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
           by tcp. Feel free to propose better solution.
                                               --ANK (980728)
         */
-       if (sk->net_pinfo.af_inet6.rxopt.all) {
-               users = atomic_read(&skb->users);
-               atomic_inc(&skb->users);
-       }
+       if (sk->net_pinfo.af_inet6.rxopt.all)
+               opt_skb = skb_clone(skb, GFP_ATOMIC);
 
        if (sk->state == TCP_ESTABLISHED) { /* Fast path */
                TCP_CHECK_TIMER(sk);
                if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
                        goto reset;
                TCP_CHECK_TIMER(sk);
-               if (users)
+               if (opt_skb)
                        goto ipv6_pktoptions;
                return 0;
        }
 
-       if (tcp_checksum_complete(skb))
+       if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
                goto csum_err;
 
        if (sk->state == TCP_LISTEN) { 
@@ -1474,8 +1472,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
                if(nsk != sk) {
                        if (tcp_child_process(sk, nsk, skb))
                                goto reset;
-                       if (users)
-                               kfree_skb(skb);
+                       if (opt_skb)
+                               __kfree_skb(opt_skb);
                        return 0;
                }
        }
@@ -1484,15 +1482,15 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
        if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
                goto reset;
        TCP_CHECK_TIMER(sk);
-       if (users)
+       if (opt_skb)
                goto ipv6_pktoptions;
        return 0;
 
 reset:
        tcp_v6_send_reset(skb);
 discard:
-       if (users)
-               kfree_skb(skb);
+       if (opt_skb)
+               __kfree_skb(opt_skb);
        kfree_skb(skb);
        return 0;
 csum_err:
@@ -1508,29 +1506,23 @@ ipv6_pktoptions:
           3. socket is not in passive state.
           4. Finally, it really contains options, which user wants to receive.
         */
-       if (atomic_read(&skb->users) > users &&
-           TCP_SKB_CB(skb)->end_seq == sk->tp_pinfo.af_tcp.rcv_nxt &&
+       if (TCP_SKB_CB(opt_skb)->end_seq == sk->tp_pinfo.af_tcp.rcv_nxt &&
            !((1<<sk->state)&(TCPF_CLOSE|TCPF_LISTEN))) {
                if (sk->net_pinfo.af_inet6.rxopt.bits.rxinfo)
-                       sk->net_pinfo.af_inet6.mcast_oif = tcp_v6_iif(skb);
+                       sk->net_pinfo.af_inet6.mcast_oif = tcp_v6_iif(opt_skb);
                if (sk->net_pinfo.af_inet6.rxopt.bits.rxhlim)
-                       sk->net_pinfo.af_inet6.mcast_hops = skb->nh.ipv6h->hop_limit;
-               if (ipv6_opt_accepted(sk, skb)) {
-                       struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
-                       kfree_skb(skb);
-                       skb = NULL;
-                       if (skb2) {
-                               skb_set_owner_r(skb2, sk);
-                               skb = xchg(&sk->net_pinfo.af_inet6.pktoptions, skb2);
-                       }
+                       sk->net_pinfo.af_inet6.mcast_hops = opt_skb->nh.ipv6h->hop_limit;
+               if (ipv6_opt_accepted(sk, opt_skb)) {
+                       skb_set_owner_r(opt_skb, sk);
+                       opt_skb = xchg(&sk->net_pinfo.af_inet6.pktoptions, opt_skb);
                } else {
-                       kfree_skb(skb);
-                       skb = xchg(&sk->net_pinfo.af_inet6.pktoptions, NULL);
+                       __kfree_skb(opt_skb);
+                       opt_skb = xchg(&sk->net_pinfo.af_inet6.pktoptions, NULL);
                }
        }
 
-       if (skb)
-               kfree_skb(skb);
+       if (opt_skb)
+               kfree_skb(opt_skb);
        return 0;
 }
 
@@ -1559,10 +1551,9 @@ int tcp_v6_rcv(struct sk_buff *skb, unsigned long len)
 
        TCP_INC_STATS_BH(TcpInSegs);
 
-       if (len < sizeof(struct tcphdr))
-               goto bad_packet;
-
-       if (tcp_v6_checksum_init(skb) < 0)
+       if (th->doff < sizeof(struct tcphdr)/4 ||
+           (skb->ip_summed != CHECKSUM_UNNECESSARY &&
+            tcp_v6_checksum_init(skb) < 0))
                goto bad_packet;
 
        TCP_SKB_CB(skb)->seq = ntohl(th->seq);
@@ -1570,6 +1561,8 @@ int tcp_v6_rcv(struct sk_buff *skb, unsigned long len)
                                    len - th->doff*4);
        TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
        TCP_SKB_CB(skb)->when = 0;
+       TCP_SKB_CB(skb)->flags = ip6_get_dsfield(skb->nh.ipv6h);
+       TCP_SKB_CB(skb)->sacked = 0;
        skb->used = 0;
 
        sk = __tcp_v6_lookup(saddr, th->source, daddr, ntohs(th->dest), tcp_v6_iif(skb));
@@ -1596,7 +1589,7 @@ process:
        return ret;
 
 no_tcp_socket:
-       if (tcp_checksum_complete(skb)) {
+       if (len < (th->doff<<2) || tcp_checksum_complete(skb)) {
 bad_packet:
                TCP_INC_STATS_BH(TcpInErrs);
        } else {
@@ -1617,7 +1610,7 @@ discard_and_relse:
        goto discard_it;
 
 do_time_wait:
-       if (tcp_checksum_complete(skb)) {
+       if (len < (th->doff<<2) || tcp_checksum_complete(skb)) {
                TCP_INC_STATS_BH(TcpInErrs);
                sock_put(sk);
                goto discard_it;
@@ -1698,6 +1691,7 @@ static int tcp_v6_xmit(struct sk_buff *skb)
        fl.fl6_dst = &np->daddr;
        fl.fl6_src = &np->saddr;
        fl.fl6_flowlabel = np->flow_label;
+       IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
        fl.oif = sk->bound_dev_if;
        fl.uli_u.ports.sport = sk->sport;
        fl.uli_u.ports.dport = sk->dport;
@@ -1816,12 +1810,19 @@ static int tcp_v6_init_sock(struct sock *sk)
        tp->snd_cwnd_clamp = ~0;
        tp->mss_cache = 536;
 
+       tp->reordering = sysctl_tcp_reordering;
+
        sk->state = TCP_CLOSE;
 
        sk->tp_pinfo.af_tcp.af_specific = &ipv6_specific;
 
        sk->write_space = tcp_write_space;
 
+       sk->sndbuf = sysctl_tcp_wmem[1];
+       sk->rcvbuf = sysctl_tcp_rmem[1];
+
+       atomic_inc(&tcp_sockets_allocated);
+
        return 0;
 }
 
@@ -1832,7 +1833,7 @@ static int tcp_v6_destroy_sock(struct sock *sk)
        tcp_clear_xmit_timers(sk);
 
        /* Cleanup up the write buffer. */
-       __skb_queue_purge(&sk->write_queue);
+       tcp_writequeue_purge(sk);
 
        /* Cleans up our, hopefuly empty, out_of_order_queue. */
        __skb_queue_purge(&tp->out_of_order_queue);
@@ -1844,11 +1845,13 @@ static int tcp_v6_destroy_sock(struct sock *sk)
        if(sk->prev != NULL)
                tcp_put_port(sk);
 
+       atomic_dec(&tcp_sockets_allocated);
+
        return inet6_destroy_sock(sk);
 }
 
 /* Proc filesystem TCPv6 sock list dumping. */
-static void get_openreq6(struct sock *sk, struct open_request *req, char *tmpbuf, int i)
+static void get_openreq6(struct sock *sk, struct open_request *req, char *tmpbuf, int i, int uid)
 {
        struct in6_addr *dest, *src;
        int ttd = req->expires - jiffies;
@@ -1873,7 +1876,7 @@ static void get_openreq6(struct sock *sk, struct open_request *req, char *tmpbuf
                1,   /* timers active (only the expire timer) */  
                ttd, 
                req->retrans,
-               sk->socket ? sk->socket->inode->i_uid : 0,
+               uid,
                0,  /* non standard timer */  
                0, /* open_requests have no inode */
                0, req);
@@ -1891,25 +1894,23 @@ static void get_tcp6_sock(struct sock *sp, char *tmpbuf, int i)
        src   = &sp->net_pinfo.af_inet6.rcv_saddr;
        destp = ntohs(sp->dport);
        srcp  = ntohs(sp->sport);
-       timer_active    = 0;
-       timer_expires   = (unsigned) -1;
-       if (timer_pending(&tp->retransmit_timer) && tp->retransmit_timer.expires < timer_expires) {
+       if (tp->pending == TCP_TIME_RETRANS) {
                timer_active    = 1;
-               timer_expires   = tp->retransmit_timer.expires;
-       } else if (timer_pending(&tp->probe_timer) && tp->probe_timer.expires < timer_expires) {
+               timer_expires   = tp->timeout;
+       } else if (tp->pending == TCP_TIME_PROBE0) {
                timer_active    = 4;
-               timer_expires   = tp->probe_timer.expires;
-       }
-       if (timer_pending(&sp->timer) && sp->timer.expires < timer_expires) {
+               timer_expires   = tp->timeout;
+       } else if (timer_pending(&sp->timer)) {
                timer_active    = 2;
                timer_expires   = sp->timer.expires;
-       }
-       if(timer_active == 0)
+       } else {
+               timer_active    = 0;
                timer_expires = jiffies;
+       }
 
        sprintf(tmpbuf,
                "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
-               "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld %d %p %u %u %u %u",
+               "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d",
                i,
                src->s6_addr32[0], src->s6_addr32[1],
                src->s6_addr32[2], src->s6_addr32[3], srcp,
@@ -1919,11 +1920,11 @@ static void get_tcp6_sock(struct sock *sp, char *tmpbuf, int i)
                tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
                timer_active, timer_expires-jiffies,
                tp->retransmits,
-               sp->socket ? sp->socket->inode->i_uid : 0,
+               sock_i_uid(sp),
                tp->probes_out,
-               sp->socket ? sp->socket->inode->i_ino : 0,
+               sock_i_ino(sp),
                atomic_read(&sp->refcnt), sp,
-               tp->rto, tp->ack.ato, tp->ack.quick, tp->ack.pingpong
+               tp->rto, tp->ack.ato, tp->ack.quick, tp->ack.pingpong, sp->sndbuf
                );
 }
 
@@ -1984,6 +1985,7 @@ int tcp6_get_info(char *buffer, char **start, off_t offset, int length)
 
                for (sk = tcp_listening_hash[i]; sk; sk = sk->next, num++) {
                        struct open_request *req;
+                       int uid;
                        struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
 
                        if (sk->family != PF_INET6)
@@ -1998,6 +2000,7 @@ int tcp6_get_info(char *buffer, char **start, off_t offset, int length)
                                }
                        }
 
+                       uid = sock_i_uid(sk);
                        read_lock_bh(&tp->syn_wait_lock);
                        lopt = tp->listen_opt;
                        if (lopt && lopt->qlen != 0) {
@@ -2008,7 +2011,7 @@ int tcp6_get_info(char *buffer, char **start, off_t offset, int length)
                                                pos += LINE_LEN+1;
                                                if (pos < offset)
                                                        continue;
-                                               get_openreq6(sk, req, tmpbuf, num);
+                                               get_openreq6(sk, req, tmpbuf, num, uid);
                                                len += sprintf(buffer+len, LINE_FMT, tmpbuf);
                                                if(len >= length) { 
                                                        read_unlock_bh(&tp->syn_wait_lock);
index f9f0c0dc9f6285c295dcceeba36fac2005c5987f..b4a00b171f0afca834b14837f7d30366aa940a6e 100644 (file)
@@ -7,7 +7,7 @@
  *
  *     Based on linux/ipv4/udp.c
  *
- *     $Id: udp.c,v 1.55 2000/07/08 00:20:43 davem Exp $
+ *     $Id: udp.c,v 1.56 2000/08/09 11:59:04 davem Exp $
  *
  *     Fixes:
  *     Hideaki YOSHIFUJI       :       sin6_scope_id support
@@ -109,7 +109,10 @@ gotit:
                            (!sk2->rcv_saddr ||
                             addr_type == IPV6_ADDR_ANY ||
                             !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
-                                           &sk2->net_pinfo.af_inet6.rcv_saddr)) &&
+                                           &sk2->net_pinfo.af_inet6.rcv_saddr) ||
+                            (addr_type == IPV6_ADDR_MAPPED &&
+                             sk2->family == AF_INET &&
+                             sk->rcv_saddr == sk2->rcv_saddr)) &&
                            (!sk2->reuse || !sk->reuse))
                                goto fail;
                }
@@ -270,7 +273,6 @@ ipv4_connected:
                        ipv6_addr_set(&np->saddr, 0, 0, 
                                      __constant_htonl(0x0000ffff),
                                      sk->saddr);
-
                }
 
                if(ipv6_addr_any(&np->rcv_saddr)) {
@@ -343,7 +345,7 @@ ipv4_connected:
 
                if(ipv6_addr_any(&np->rcv_saddr)) {
                        ipv6_addr_copy(&np->rcv_saddr, &saddr);
-                       sk->rcv_saddr = 0xffffffff;
+                       sk->rcv_saddr = LOOPBACK4_IPV6;
                }
                sk->state = TCP_ESTABLISHED;
        }
@@ -923,8 +925,8 @@ static void get_udp6_sock(struct sock *sp, char *tmpbuf, int i)
                sp->state, 
                atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
                sock_timer_active, timer_expires-jiffies, 0,
-               sp->socket->inode->i_uid, 0,
-               sp->socket ? sp->socket->inode->i_ino : 0,
+               sock_i_uid(sp), 0,
+               sock_i_ino(sp),
                atomic_read(&sp->refcnt), sp);
 }
 
index da99c07839166525d58d4930f27ef92f292b5704..fe78e71f1c85620ca45e4484f9a7aaa84a604786 100644 (file)
@@ -315,7 +315,6 @@ EXPORT_SYMBOL(tcp_recvmsg);
 EXPORT_SYMBOL(tcp_send_synack);
 EXPORT_SYMBOL(tcp_check_req);
 EXPORT_SYMBOL(tcp_child_process);
-EXPORT_SYMBOL(tcp_reset_xmit_timer);
 EXPORT_SYMBOL(tcp_parse_options);
 EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_init_xmit_timers);
@@ -355,6 +354,12 @@ EXPORT_SYMBOL(tcp_port_rover);
 EXPORT_SYMBOL(udp_port_rover);
 EXPORT_SYMBOL(tcp_sync_mss);
 EXPORT_SYMBOL(net_statistics); 
+EXPORT_SYMBOL(__tcp_mem_reclaim);
+EXPORT_SYMBOL(tcp_sockets_allocated);
+EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(sysctl_tcp_rmem);
+EXPORT_SYMBOL(sysctl_tcp_wmem);
+EXPORT_SYMBOL(tcp_cwnd_application_limited);
 
 EXPORT_SYMBOL(xrlim_allow);
 
@@ -569,51 +574,6 @@ EXPORT_SYMBOL(nf_setsockopt);
 EXPORT_SYMBOL(nf_getsockopt);
 #endif
 
-#ifdef CONFIG_IP_NF_CONNTRACK
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-EXPORT_SYMBOL(ip_conntrack_protocol_register);
-EXPORT_SYMBOL(invert_tuplepr);
-EXPORT_SYMBOL(ip_conntrack_alter_reply);
-EXPORT_SYMBOL(ip_conntrack_destroyed);
-EXPORT_SYMBOL(ip_conntrack_get);
-EXPORT_SYMBOL(ip_conntrack_module);
-EXPORT_SYMBOL(ip_conntrack_helper_register);
-EXPORT_SYMBOL(ip_conntrack_helper_unregister);
-EXPORT_SYMBOL(ip_ct_selective_cleanup);
-EXPORT_SYMBOL(ip_ct_refresh);
-EXPORT_SYMBOL(ip_conntrack_expect_related);
-EXPORT_SYMBOL(ip_conntrack_tuple_taken);
-EXPORT_SYMBOL(ip_ct_gather_frags);
-#ifdef CONFIG_IP_NF_FTP
-#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
-EXPORT_SYMBOL(ip_ftp_lock);
-#endif
-#endif /*CONFIG_IP_NF_CONNTRACK*/
-
-#ifdef CONFIG_IP_NF_NAT
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-EXPORT_SYMBOL(ip_nat_setup_info);
-EXPORT_SYMBOL(ip_nat_helper_register);
-EXPORT_SYMBOL(ip_nat_helper_unregister);
-EXPORT_SYMBOL(ip_nat_expect_register);
-EXPORT_SYMBOL(ip_nat_expect_unregister);
-EXPORT_SYMBOL(ip_nat_cheat_check);
-#endif
-
-#ifdef CONFIG_IP_NF_IPTABLES
-#include <linux/netfilter_ipv4/ip_tables.h>
-EXPORT_SYMBOL(ipt_register_table);
-EXPORT_SYMBOL(ipt_unregister_table);
-EXPORT_SYMBOL(ipt_register_target);
-EXPORT_SYMBOL(ipt_unregister_target);
-EXPORT_SYMBOL(ipt_register_match);
-EXPORT_SYMBOL(ipt_unregister_match);
-#endif
-
 EXPORT_SYMBOL(register_gifconf);
 
 EXPORT_SYMBOL(net_call_rx_atomic);
index 7a484606bbae6308477ebd251c0ddc2aee787633..617519846d9d5a162cb53fdfb3c082cffb67759a 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             PACKET - implements raw packet sockets.
  *
- * Version:    $Id: af_packet.c,v 1.39 2000/08/09 08:04:45 davem Exp $
+ * Version:    $Id: af_packet.c,v 1.41 2000/08/10 01:21:14 davem Exp $
  *
  * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 extern int dlci_ioctl(unsigned int, void*);
 #endif
 
-/*
-   Old SOCK_PACKET. Do exist programs, which use it?
-   (not counting tcpdump) - lots of them yes - AC. 
-   
- */
 #define CONFIG_SOCK_PACKET     1
 
 /*
@@ -89,22 +84,10 @@ extern int dlci_ioctl(unsigned int, void*);
    It is more expensive, but I believe,
    it is really correct solution: reentereble, safe and fault tolerant.
 
-   Differences:
-   - Changing IFF_ALLMULTI from user level is disabled.
-     It could only confused multicast routing daemons, not more.
-   - IFF_PROMISC is faked by keeping reference count and
-     global flag, so that real IFF_PROMISC == (gflag|(count != 0))
-     I'd remove it too, but it would require recompilation tcpdump
-     and another applications, using promiscuous mode.
-   - SIOC{ADD/DEL}MULTI are moved to deprecated state,
-     they work, but complain. I do know who uses them.
-     
-*************FIXME***************
-  Alexey : This doesnt cook Im afraid. We need the low level SIOCADD/DELMULTI
-  and also IFF_ALLMULTI for DECNET, Appletalk and other stuff as well as
-  BSD compatibility issues.
-  
+   IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
+   reference count and global flag, so that real status is
+   (gflag|(count != 0)), so that we can use obsolete faulty interface
+   not harming clever users.
  */
 #define CONFIG_PACKET_MULTICAST        1
 
@@ -206,6 +189,7 @@ struct packet_opt
        unsigned int            frame_size;
        unsigned int            iovmax;
        unsigned int            head;
+       int                     copy_thresh;
 #endif
 };
 
@@ -537,7 +521,9 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct pack
        struct tpacket_hdr *h;
        u8 * skb_head = skb->data;
        unsigned snaplen;
-       unsigned long losing;
+       unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
+       unsigned short macoff, netoff;
+       struct sk_buff *copy_skb = NULL;
 
        if (skb->pkt_type == PACKET_LOOPBACK)
                goto drop;
@@ -572,38 +558,55 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct pack
                        snaplen = res;
        }
 #endif
-       spin_lock(&sk->receive_queue.lock);
-       h = po->iovec[po->head];
-
-       if (h->tp_status)
-               goto ring_is_full;
-       po->head = po->head != po->iovmax ? po->head+1 : 0;
-       po->stats.tp_packets++;
-       losing = TP_STATUS_LOSING;
-       if (!po->stats.tp_drops)
-               losing = 0;
-       spin_unlock(&sk->receive_queue.lock);
 
        if (sk->type == SOCK_DGRAM) {
-               h->tp_mac = h->tp_net = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
+               macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
        } else {
                unsigned maclen = skb->nh.raw - skb->data;
-               h->tp_net = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
-               h->tp_mac = h->tp_net - maclen;
+               netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
+               macoff = netoff - maclen;
        }
 
-       if (h->tp_mac + snaplen > po->frame_size) {
-               snaplen = po->frame_size - h->tp_mac;
+       if (macoff + snaplen > po->frame_size) {
+               if (po->copy_thresh &&
+                   atomic_read(&sk->rmem_alloc) + skb->truesize < (unsigned)sk->rcvbuf) {
+                       if (skb_shared(skb)) {
+                               copy_skb = skb_clone(skb, GFP_ATOMIC);
+                       } else {
+                               copy_skb = skb_get(skb);
+                               skb_head = skb->data;
+                       }
+                       if (copy_skb)
+                               skb_set_owner_r(copy_skb, sk);
+               }
+               snaplen = po->frame_size - macoff;
                if ((int)snaplen < 0)
                        snaplen = 0;
        }
 
-       memcpy((u8*)h + h->tp_mac, skb->data, snaplen);
+       spin_lock(&sk->receive_queue.lock);
+       h = po->iovec[po->head];
+
+       if (h->tp_status)
+               goto ring_is_full;
+       po->head = po->head != po->iovmax ? po->head+1 : 0;
+       po->stats.tp_packets++;
+       if (copy_skb) {
+               status |= TP_STATUS_COPY;
+               __skb_queue_tail(&sk->receive_queue, copy_skb);
+       }
+       if (!po->stats.tp_drops)
+               status &= ~TP_STATUS_LOSING;
+       spin_unlock(&sk->receive_queue.lock);
+
+       memcpy((u8*)h + macoff, skb->data, snaplen);
 
-       h->tp_sec = skb->stamp.tv_sec;
-       h->tp_usec = skb->stamp.tv_usec;
        h->tp_len = skb->len;
        h->tp_snaplen = snaplen;
+       h->tp_mac = macoff;
+       h->tp_net = netoff;
+       h->tp_sec = skb->stamp.tv_sec;
+       h->tp_usec = skb->stamp.tv_usec;
 
        sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
        sll->sll_halen = 0;
@@ -615,7 +618,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct pack
        sll->sll_pkttype = skb->pkt_type;
        sll->sll_ifindex = dev->ifindex;
 
-       h->tp_status = losing|TP_STATUS_USER;
+       h->tp_status = status;
        mb();
 
        sk->data_ready(sk, 0);
@@ -634,6 +637,8 @@ ring_is_full:
        spin_unlock(&sk->receive_queue.lock);
 
        sk->data_ready(sk, 0);
+       if (copy_skb)
+               kfree_skb(copy_skb);
        goto drop_n_restore;
 }
 
@@ -1286,6 +1291,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char *optval, int
                        return -EFAULT;
                return packet_set_ring(sk, &req, 0);
        }
+       case PACKET_COPY_THRESH:
+       {
+               int val;
+
+               if (optlen!=sizeof(val))
+                       return -EINVAL;
+               if (copy_from_user(&val,optval,sizeof(val)))
+                       return -EFAULT;
+
+               sk->protinfo.af_packet->copy_thresh = val;
+               return 0;
+       }
 #endif
        default:
                return -ENOPROTOOPT;
@@ -1814,8 +1831,8 @@ static int packet_read_proc(char *buffer, char **start, off_t offset,
                             s->protinfo.af_packet->ifindex,
                             s->protinfo.af_packet->running,
                             atomic_read(&s->rmem_alloc),
-                            s->socket->inode->i_uid,
-                            s->socket->inode->i_ino
+                            sock_i_uid(s),
+                            sock_i_ino(s)
                             );
 
                buffer[len++]='\n';
index 1d327267e174aadf415551ec07662f1fe34fcbf9..ac264c396a3fa2a881345359d9baac21ee1c4001 100644 (file)
@@ -8,7 +8,7 @@
  *             as published by the Free Software Foundation; either version
  *             2 of the License, or (at your option) any later version.
  *
- * Version:    $Id: af_unix.c,v 1.102 2000/07/26 01:04:21 davem Exp $
+ * Version:    $Id: af_unix.c,v 1.104 2000/08/10 01:21:14 davem Exp $
  *
  * Fixes:
  *             Linus Torvalds  :       Assorted bug cures.
@@ -306,6 +306,27 @@ static void unix_write_space(struct sock *sk)
        read_unlock(&sk->callback_lock);
 }
 
+/* When dgram socket disconnects (or changes its peer), we clear its receive
+ * queue of packets arrived from previous peer. First, it allows to do
+ * flow control based only on wmem_alloc; second, sk connected to peer
+ * may receive messages only from that peer. */
+static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
+{
+       if (skb_queue_len(&sk->receive_queue)) {
+               skb_queue_purge(&sk->receive_queue);
+               wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait);
+
+               /* If one link of bidirectional dgram pipe is disconnected,
+                * we signal error. Messages are lost. Do not make this,
+                * when peer was not connected to us.
+                */
+               if (!other->dead && unix_peer(other) == sk) {
+                       other->err = ECONNRESET;
+                       other->error_report(other);
+               }
+       }
+}
+
 static void unix_sock_destructor(struct sock *sk)
 {
        skb_queue_purge(&sk->receive_queue);
@@ -788,6 +809,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
                unix_peer(sk)=other;
                unix_state_wunlock(sk);
 
+               if (other != old_peer)
+                       unix_dgram_disconnected(sk, old_peer);
                sock_put(old_peer);
        } else {
                unix_peer(sk)=other;
@@ -1203,6 +1226,7 @@ restart:
                        unix_peer(sk)=NULL;
                        unix_state_wunlock(sk);
 
+                       unix_dgram_disconnected(sk, other);
                        sock_put(other);
                        err = -ECONNREFUSED;
                } else {
@@ -1219,7 +1243,8 @@ restart:
        if (other->shutdown&RCV_SHUTDOWN)
                goto out_unlock;
 
-       if (skb_queue_len(&other->receive_queue) > other->max_ack_backlog) {
+       if (unix_peer(other) != sk &&
+           skb_queue_len(&other->receive_queue) > other->max_ack_backlog) {
                if (!timeo) {
                        err = -EAGAIN;
                        goto out_unlock;
@@ -1640,7 +1665,6 @@ static int unix_shutdown(struct socket *sock, int mode)
        return 0;
 }
 
-               
 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
        struct sock *sk = sock->sk;
@@ -1736,7 +1760,7 @@ static int unix_read_proc(char *buffer, char **start, off_t offset,
                        s->socket ?
                        (s->state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
                        (s->state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
-                       s->socket ? s->socket->inode->i_ino : 0);
+                       sock_i_ino(s));
 
                if (s->protinfo.af_unix.addr)
                {