Import 1.3.62

author Linus Torvalds <torvalds@linuxfoundation.org>

Fri, 23 Nov 2007 20:10:33 +0000 (15:10 -0500)

committer Linus Torvalds <torvalds@linuxfoundation.org>

Fri, 23 Nov 2007 20:10:33 +0000 (15:10 -0500)
author Linus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:10:33 +0000 (15:10 -0500)
committer Linus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:10:33 +0000 (15:10 -0500)
diff --git a/Makefile b/Makefile

index 4ecd8861aacd4f89b41795c472e9db87a28a2fe5..148b15bfc2a5cef25501339a69152a2c1bd10d53 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
  VERSION = 1
  PATCHLEVEL = 3
-SUBLEVEL = 61
+SUBLEVEL = 62
  
  ARCH = i386
  
diff --git a/arch/alpha/defconfig b/arch/alpha/defconfig

index 2b0a0a1e02d2cd03f608f3a4d8a1ee3862f28926..1d0e44bd2587fcd33751e9d79638af883d698447 100644 (file)
--- a/arch/alpha/defconfig
+++ b/arch/alpha/defconfig
@@ -45,8 +45,10 @@ CONFIG_ST506=y
  CONFIG_BLK_DEV_IDE=y
  # CONFIG_BLK_DEV_IDECD is not set
  # CONFIG_BLK_DEV_IDETAPE is not set
+# CONFIG_BLK_DEV_RZ1000 is not set
  # CONFIG_BLK_DEV_CMD640 is not set
  # CONFIG_BLK_DEV_TRITON is not set
+# CONFIG_IDE_CHIPSETS is not set
  # CONFIG_BLK_DEV_XD is not set
  
  #
@@ -164,6 +166,7 @@ CONFIG_MSDOS_FS=y
  # CONFIG_UMSDOS_FS is not set
  CONFIG_PROC_FS=y
  CONFIG_NFS_FS=y
+# CONFIG_ROOT_NFS is not set
  # CONFIG_SMB_FS is not set
  CONFIG_ISO9660_FS=y
  # CONFIG_HPFS_FS is not set
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S

index a08cf26151349908692831d5dc0537419e42ef29..90a9e1ba465dcec4efe4a5b7dfc7be7db2e0b2a6 100644 (file)
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -128,26 +128,12 @@ entInt:
  /* set up the arguments to the C interrupt handler */
         lda     $27,do_entInt
         jsr     $26,($27),do_entInt
-/* ok, check if we need to do software interrupts */
-1:     lda     $0,intr_count
+/* ok, return */
+       lda     $0,intr_count
         ldq     $1,0($0)
         subq    $1,1,$1
-       bne     $1,2f           /* interrupt within interrupt: return now */
-       lda     $2,bh_active
-       ldq     $3,0($2)
-       lda     $2,bh_mask
-       ldq     $2,0($2)
-       and     $2,$3,$2
-       bne     $2,3f
         stq     $1,0($0)
         br      $31,ret_from_sys_call
-.align 3
-2:     stq     $1,0($0)
-       br      $31,restore_all
-.align 3
-3:     lda     $27,do_bottom_half
-       jsr     $26,($27),do_bottom_half
-       br      $31,1b
  .end entInt
  
  .align 3
@@ -204,7 +190,7 @@ kernel_clone:
         lda $27,sys_clone
         jsr $26,($27),sys_clone
         stq $0,0($30)   
-       br ret_from_sys_call
+       br $31,ret_from_sys_call
  .end   kernel_clone
  
  /*
@@ -502,10 +488,23 @@ entSys:
         blt     $0,syscall_error        /* the call failed */
         stq     $0,0($30)
         stq     $31,72($30)             /* a3=0 => no error */
+
  .align 3
  ret_from_sys_call:
-       ldq     $0,SP_OFF($30)
         cmovne  $26,0,$19               /* $19 = 0 => non-restartable */
+       /* check bottom half interrupts */
+       lda     $0,intr_count
+       ldq     $1,0($0)
+       bne     $1,ret_from_handle_bh
+       lda     $2,bh_active
+       ldq     $3,0($2)
+       lda     $2,bh_mask
+       ldq     $4,0($2)
+       addq    $1,1,$1
+       and     $3,$4,$2
+       bne     $2,handle_bottom_half   
+ret_from_handle_bh:
+       ldq     $0,SP_OFF($30)
         and     $0,8,$0
         beq     $0,restore_all
  ret_from_reschedule:
@@ -525,6 +524,25 @@ restore_all:
         RESTORE_ALL
         rti
  
+       .align 3
+handle_bottom_half:
+       /*
+        * We're called with $0 containing the address of
+        * 'intr_count' and $1 containing 'intr_count+1'
+        */
+       stq     $1,0($0)        /* intr_count = 1 */
+       subq    $30,16,$30
+       stq     $19,0($30)      /* save syscall nr */
+       stq     $20,8($30)      /* and error indication (a3) */
+       lda     $27,do_bottom_half
+       jsr     $26,($27),do_bottom_half
+       lda     $0,intr_count
+       ldq     $19,0($30)
+       ldq     $20,8($30)
+       addq    $30,16,$30
+       stq     $31,0($0)       /* intr_count = 0 */
+       br      $31,ret_from_handle_bh
+
         .align 3
  syscall_error:
         /*
diff --git a/drivers/block/ide-cd.c b/drivers/block/ide-cd.c

index 12d22d324b831b9093ba32ef0e24a8c879e9aa4c..232dbd6632043f5ae084fd9e5edc6f6bcaf3f093 100644 (file)
--- a/drivers/block/ide-cd.c
+++ b/drivers/block/ide-cd.c
@@ -79,13 +79,18 @@
   *                       Try to eliminate byteorder assumptions.
   *                       Use atapi_cdrom_subchnl struct definition.
   *                       Add STANDARD_ATAPI compilation option.
+ * 3.07  Jan 29, 1996 -- More twiddling for broken drives: Sony 55D,
+ *                        Vertos 300.
+ *                       Add NO_DOOR_LOCKING configuration option.
+ *                       Handle drive_cmd requests w/NULL args (for hdparm -t).
+ *                       Work around sporadic Sony55e audio play problem.
   *
   * NOTE: Direct audio reads will only work on some types of drive.
   * So far, i've received reports of success for Sony and Toshiba drives.
   *
   * ATAPI cd-rom driver.  To be used with ide.c.
   *
- * Copyright (C) 1994, 1995  scott snyder  <snyder@fnald0.fnal.gov>
+ * Copyright (C) 1994, 1995, 1996  scott snyder  <snyder@fnald0.fnal.gov>
   * May be copied or modified under the terms of the GNU General Public License
   * (../../COPYING).
   */
@@ -130,6 +135,14 @@
  #endif
  
  
+/* Turning this on will disable the door-locking functionality.
+   This is apparently needed for supermount. */
+
+#ifndef NO_DOOR_LOCKING
+#define NO_DOOR_LOCKING 0
+#endif
+
+
  /************************************************************************/
  
  #define SECTOR_SIZE 512
@@ -1420,6 +1433,43 @@ int cdrom_queue_packet_command (ide_drive_t *drive, struct packet_command *pc)
  }
  
  
+\f
+/****************************************************************************
+ * drive_cmd handling.
+ *
+ * Most of the functions accessed via drive_cmd are not valid for ATAPI
+ * devices.  Only attempt to execute those which actually should be valid.
+ */
+
+static
+void cdrom_do_drive_cmd (ide_drive_t *drive)
+{
+  struct request *rq = HWGROUP(drive)->rq;
+  byte *args = rq->buffer;
+
+  if (args)
+    {
+#if 0  /* This bit isn't done yet... */
+      if (args[0] == WIN_SETFEATURES &&
+         (args[2] == 0x66 || args[2] == 0xcc || args[2] == 0x02 ||
+          args[2] == 0xdd || args[2] == 0x5d))
+       {
+         OUT_BYTE (args[2], io_base + IDE_FEATURE_OFFSET);
+         <send cmd>
+       }
+      else
+#endif
+       {
+         printk ("%s: Unsupported drive command %02x %02x %02x\n",
+                 drive->name, args[0], args[1], args[2]);
+         rq->errors = 1;
+       }
+    }
+
+  cdrom_end_request (1, drive);
+}
+
+
  \f
  /****************************************************************************
   * cdrom driver request routine.
@@ -1439,6 +1489,9 @@ void ide_do_rw_cdrom (ide_drive_t *drive, unsigned long block)
        return;
      }
  
+  else if (rq -> cmd == IDE_DRIVE_CMD)
+    cdrom_do_drive_cmd (drive);
+
    else if (rq -> cmd != READ)
      {
        printk ("ide-cd: bad cmd %d\n", rq -> cmd);
@@ -1890,11 +1943,9 @@ cdrom_play_lba_range_msf (ide_drive_t *drive, int lba_start, int lba_end,
  #endif  /* not STANDARD_ATAPI */
  
  
-/* Play audio starting at LBA LBA_START and finishing with the
-   LBA before LBA_END. */
  static int
-cdrom_play_lba_range (ide_drive_t *drive, int lba_start, int lba_end,
-                     struct atapi_request_sense *reqbuf)
+cdrom_play_lba_range_1 (ide_drive_t *drive, int lba_start, int lba_end,
+                       struct atapi_request_sense *reqbuf)
  {
    /* This is rather annoying.
       My NEC-260 won't recognize group 5 commands such as PLAYAUDIO12;
@@ -1942,6 +1993,38 @@ cdrom_play_lba_range (ide_drive_t *drive, int lba_start, int lba_end,
  }
  
  
+/* Play audio starting at LBA LBA_START and finishing with the
+   LBA before LBA_END. */
+static int
+cdrom_play_lba_range (ide_drive_t *drive, int lba_start, int lba_end,
+                     struct atapi_request_sense *reqbuf)
+{
+  int i, stat;
+  struct atapi_request_sense my_reqbuf;
+
+  if (reqbuf == NULL)
+    reqbuf = &my_reqbuf;
+
+  /* Some drives, will, for certain audio cds,
+     give an error if you ask them to play the entire cd using the
+     values which are returned in the TOC.  The play will succeed, however,
+     if the ending address is adjusted downwards by a few frames. */
+  for (i=0; i<75; i++)
+    {
+      stat = cdrom_play_lba_range_1 (drive, lba_start, lba_end, reqbuf);
+
+      if (stat == 0 ||
+          !(reqbuf->sense_key == ILLEGAL_REQUEST && reqbuf->asc == 0x24))
+       return stat;
+
+      --lba_end;
+      if (lba_end <= lba_start) break;
+    }
+
+  return stat;
+}
+
+
  static
  int cdrom_get_toc_entry (ide_drive_t *drive, int track,
                           struct atapi_toc_entry **ent,
@@ -2575,7 +2658,12 @@ void ide_cdrom_setup (ide_drive_t *drive)
    /* Turn this off by default, since many people don't like it. */
    CDROM_STATE_FLAGS (drive)->eject_on_close= 0;
  
+#if NO_DOOR_LOCKING
+  CDROM_CONFIG_FLAGS (drive)->no_doorlock = 1;
+#else
    CDROM_CONFIG_FLAGS (drive)->no_doorlock = 0;
+#endif
+
    CDROM_CONFIG_FLAGS (drive)->drq_interrupt =
      ((drive->id->config & 0x0060) == 0x20);
  
@@ -2608,17 +2696,30 @@ void ide_cdrom_setup (ide_drive_t *drive)
        CDROM_CONFIG_FLAGS (drive)->no_playaudio12 = 1;
      }
  
-  else if (strcmp (drive->id->model, "V003S0DS") == 0 ||  /* Vertos */
-          strcmp (drive->id->model, "0V300SSD") == 0)
+  /* Vertos 300.
+     There seem to be at least two different, incompatible versions
+     of this drive floating around.  Luckily, they appear to return their
+     id strings with different byte orderings. */
+  else if (strcmp (drive->id->model, "V003S0DS") == 0)
      {
        CDROM_CONFIG_FLAGS (drive)->vertos_lossage = 1;
        CDROM_CONFIG_FLAGS (drive)->playmsf_uses_bcd = 1;
        CDROM_CONFIG_FLAGS (drive)->no_lba_toc = 1;
      }
+  else if (strcmp (drive->id->model, "0V300SSD") == 0 ||
+          strcmp (drive->id->model, "V003M0DP") == 0)
+    CDROM_CONFIG_FLAGS (drive)->no_lba_toc = 1;
  
+  /* Vertos 400. */
    else if (strcmp (drive->id->model, "V004E0DT") == 0 ||
            strcmp (drive->id->model, "0V400ETD") == 0)
      CDROM_CONFIG_FLAGS (drive)->no_lba_toc = 1;
+
+  else if ( strcmp (drive->id->model, "CD-ROM CDU55D") == 0) /*sony cdu55d */
+    CDROM_CONFIG_FLAGS (drive)->no_playaudio12 = 1;
+
+ else if (strcmp (drive->id->model, "CD-ROM CDU55E") == 0)
+       CDROM_CONFIG_FLAGS (drive)->no_playaudio12 = 1;
  #endif  /* not STANDARD_ATAPI */
  
    drive->cdrom_info.toc               = NULL;
diff --git a/drivers/char/ChangeLog b/drivers/char/ChangeLog

index 9d3ff86c4ca2878614bbcf1f292ead1c420fa9ee..9a2e2f9090e8b9ffa66268c4eeb5fb80262e9ef7 100644 (file)
--- a/drivers/char/ChangeLog
+++ b/drivers/char/ChangeLog
@@ -1,3 +1,8 @@
+Fri Feb  9 14:15:47 1996    <tytso@rsts-11.mit.edu>
+
+       * serial.c (block_til_ready): Fixed another race condition which
+               happens if a hangup happens during the open.
+
  Wed Jan 10 10:08:00 1996    <tytso@rsts-11.mit.edu>
  
         * serial.c (block_til_ready): Remove race condition which happened
diff --git a/drivers/char/apm_bios.c b/drivers/char/apm_bios.c

index 92bd21d459b2a223d4c7ef78ab2925d1d5e660cb..2d6890d859a9ba8e03a0df73bb7f8ecf98a63e23 100644 (file)
--- a/drivers/char/apm_bios.c
+++ b/drivers/char/apm_bios.c
@@ -705,7 +705,10 @@ static void do_apm_timer(unsigned long unused)
                 if (err)
                         apm_error("busy", err);
         }
-       check_events();
+
+       if (!(((standbys_pending > 0) || (suspends_pending > 0))
+             && (apm_bios_info.version == 0x100)))
+               check_events();
  
         init_timer(&apm_timer);
         apm_timer.expires = APM_CHECK_TIMEOUT + jiffies;
diff --git a/drivers/char/console.c b/drivers/char/console.c

index 7ffaa675350bcabdf3404f09af6e79b250274ba4..cb35566059c164d177783138f0af6b4e6fd40fe9 100644 (file)
--- a/drivers/char/console.c
+++ b/drivers/char/console.c
@@ -72,6 +72,12 @@
  #define CTRL_ACTION 0x0d00ff81
  #define CTRL_ALWAYS 0x0800f501 /* Cannot be overridden by disp_ctrl */
  
+/*
+ * Here is the default bell parameters: 750HZ, 1/8th of a second
+ */
+#define DEFAULT_BELL_PITCH     750
+#define DEFAULT_BELL_DURATION  (HZ/8)
+
  /*
   *  NOTE!!! We sometimes disable and enable interrupts for a short while
   * (to put a word in video IO), but this will work even for keyboard
@@ -1136,17 +1142,16 @@ static void setterm_command(int currcons)
                         break;
                 case 10: /* set bell frequency in Hz */
                         if (npar >= 1)
-                               bell_pitch = (par[1] < 20 || par[1] > 32767) ?
-                                       0 : 1193180 / par[1];
+                               bell_pitch = par[1];
                         else
-                               bell_pitch = 0x637;
+                               bell_pitch = DEFAULT_BELL_PITCH;
                         break;
                 case 11: /* set bell duration in msec */
                         if (npar >= 1)
                                 bell_duration = (par[1] < 2000) ?
                                         par[1]*HZ/1000 : 0;
                         else
-                               bell_duration = HZ/8;
+                               bell_duration = DEFAULT_BELL_DURATION;
                         break;
                 case 12: /* bring specified console to the front */
                         if (par[1] >= 1 && vc_cons_allocated(par[1]-1))
@@ -1318,8 +1323,8 @@ static void reset_terminal(int currcons, int do_clear)
         tab_stop[3]     =
         tab_stop[4]     = 0x01010101;
  
-       bell_pitch = 0x637;
-       bell_duration = HZ/8;
+       bell_pitch = DEFAULT_BELL_PITCH;
+       bell_duration = DEFAULT_BELL_DURATION;
  
         gotoxy(currcons,0,0);
         save_cur(currcons);
@@ -1475,7 +1480,7 @@ static int con_write(struct tty_struct * tty, int from_user,
                  */
                 switch (c) {
                         case 7:
-                               if (bell_pitch && bell_duration)
+                               if (bell_duration)
                                         kd_mksound(bell_pitch, bell_duration);
                                 continue;
                         case 8:
diff --git a/drivers/char/serial.c b/drivers/char/serial.c

index 1022e284c2d6b875f8db8956dfee1ae9e61d6884..5833745cda7008a929f9401819e5505c5f783a72 100644 (file)
--- a/drivers/char/serial.c
+++ b/drivers/char/serial.c
@@ -2218,7 +2218,8 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
          */
         if (tty_hung_up_p(filp) ||
             (info->flags & ASYNC_CLOSING)) {
-               interruptible_sleep_on(&info->close_wait);
+               if (info->flags & ASYNC_CLOSING)
+                       interruptible_sleep_on(&info->close_wait);
  #ifdef SERIAL_DO_RESTART
                 if (info->flags & ASYNC_HUP_NOTIFY)
                         return -EAGAIN;
diff --git a/drivers/char/vt.c b/drivers/char/vt.c

index c2c5f17f1aa32e3361a02ee22c91ae458ab885a9..72af7eac3c8c3617c0a21ef3b6cc9a5f328c55e1 100644 (file)
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -155,10 +155,16 @@ kd_nosound(unsigned long ignored)
  }
  
  void
-kd_mksound(unsigned int count, unsigned int ticks)
+_kd_mksound(unsigned int hz, unsigned int ticks)
  {
-       static struct timer_list sound_timer = { NULL, NULL, 0, 0, kd_nosound };
+       static struct timer_list sound_timer = { NULL, NULL, 0, 0,
+                                                kd_nosound };
  
+       unsigned int count = 0;
+
+       if (hz > 20 && hz < 32767)
+               count = 1193180 / hz;
+       
         cli();
         del_timer(&sound_timer);
         if (count) {
@@ -180,6 +186,8 @@ kd_mksound(unsigned int count, unsigned int ticks)
         return;
  }
  
+void (*kd_mksound)(unsigned int hz, unsigned int ticks) = _kd_mksound;
+       
  /*
   * We handle the console-specific ioctl's here.  We allow the
   * capability to modify any console, not just the fg_console. 
@@ -211,22 +219,22 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
         case KIOCSOUND:
                 if (!perm)
                         return -EPERM;
-               kd_mksound((unsigned int)arg, 0);
+               kd_mksound(1193180 / (unsigned int) arg, 0);
                 return 0;
  
         case KDMKTONE:
                 if (!perm)
                         return -EPERM;
         {
-               unsigned int ticks = HZ * ((arg >> 16) & 0xffff) / 1000;
-
+               unsigned int ticks, count;
+               
                 /*
                  * Generate the tone for the appropriate number of ticks.
                  * If the time is zero, turn off sound ourselves.
                  */
-               kd_mksound(arg & 0xffff, ticks);
-               if (ticks == 0)
-                       kd_nosound(0);
+               ticks = HZ * ((arg >> 16) & 0xffff) / 1000;
+               count = ticks ? (1193180 / (arg & 0xffff)) : 0;
+               kd_mksound(count, ticks);
                 return 0;
         }
  
diff --git a/drivers/char/vt_kern.h b/drivers/char/vt_kern.h

index 135369a5f9fbb275578dad6a1263579bd7a3000f..1692f991c815c00d0beddb1cd0f020bca90c3f81 100644 (file)
--- a/drivers/char/vt_kern.h
+++ b/drivers/char/vt_kern.h
@@ -30,7 +30,7 @@ extern struct vt_struct {
         struct wait_queue *paste_wait;
  } *vt_cons[MAX_NR_CONSOLES];
  
-void kd_mksound(unsigned int count, unsigned int ticks);
+void (*kd_mksound)(unsigned int hz, unsigned int ticks);
  int vc_allocate(unsigned int console);
  int vc_cons_allocated(unsigned int console);
  int vc_resize(unsigned long lines, unsigned long cols);
diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c

index fde60129b21ba694628c74c5bbf680506eb943a6..86c870b6816603051cd223aafacf697e36e308e9 100644 (file)
--- a/drivers/net/3c59x.c
+++ b/drivers/net/3c59x.c
@@ -413,6 +413,16 @@ static int vortex_found_device(struct device *dev, int ioaddr, int irq,
         vp  = (struct vortex_private *)dev->priv;
         vp->product_name = product_names[product_index];
         vp->options = options;
+       if (options >= 0) {
+               vp->media_override = options & 7;
+               vp->full_duplex = (options & 8) ? 1 : 0;
+               vp->bus_master = (options & 16) ? 1 : 0;
+       } else {
+               vp->media_override = 7;
+               vp->full_duplex = 0;
+               vp->bus_master = 0;
+       }
+
         vortex_probe1(dev);
  #endif /* MODULE */
         return 0;
diff --git a/drivers/net/ibmtr.c b/drivers/net/ibmtr.c

index f67bad89f7ba43f0b50875b284e07f36ed612acb..dd455127691a812c5bea39e9e6de9871d4bf66e9 100644 (file)
--- a/drivers/net/ibmtr.c
+++ b/drivers/net/ibmtr.c
@@ -1242,7 +1242,7 @@ DPRINTK("tada: sending packet...\n");
  
         if (dev->tbusy) {
                 int ticks_waited=jiffies - dev->trans_start;
-               if(ticks_waited<5)
+               if(ticks_waited<TX_TIMEOUT)
                         return 1;
                 DPRINTK("Arrg. Transmitter busy for more than 50 msec. Donald resets adapter, but resetting\n \
  the IBM tokenring adapter takes a long time. It might not even help when the\n \
diff --git a/drivers/net/ibmtr.h b/drivers/net/ibmtr.h

index 59fe7a9fd57c7663375eaccb4f0264366bec03e5..0fb831f0547fa052829e5ae5aa076dfe1f381585 100644 (file)
--- a/drivers/net/ibmtr.h
+++ b/drivers/net/ibmtr.h
@@ -8,6 +8,16 @@
  #define NOTOK 0
  #define TOKDEBUG 1
  
+/* Mike Eckhoff -- 96/02/08 */
+/* This defines the minimum timeout. If a transmission takes */
+/* longer then TX_TIMEOUT to send, we will wait and retry. */
+/* On large networks, this value may need to be increased. */
+/* We will start at .2s because that is what most drivers seem to be doing */
+/* now and the original value of .05s was not nearly enough for large nets. */
+
+#define TX_TIMEOUT (HZ/5)
+
+
  #ifndef IBMTR_SHARED_RAM_BASE
  #define IBMTR_SHARED_RAM_BASE 0xD0
  #define IBMTR_SHARED_RAM_SIZE 0x10
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c

index 2c50d3412bf315c3d5ecdf5336e03f72bd589ad7..21e6749529f802cdfe00ca41518d4d31be87fcf8 100644 (file)
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -11,7 +11,7 @@
    Copyright 1992, 1993, 1994, 1995 Kai Makisara
                  email Kai.Makisara@metla.fi
  
-  Last modified: Thu Dec 14 21:51:16 1995 by root@kai.makisara.fi
+  Last modified: Mon Jan 29 21:18:12 1996 by root@kai.makisara.fi
    Some small formal changes - aeb, 950809
  */
  
@@ -438,6 +438,11 @@ flush_buffer(struct inode * inode, struct file * filp, int seek_next)
      if (!result && backspace > 0)
        result = st_int_ioctl(inode, filp, MTBSR, backspace);
    }
+  else if ((STp->eof == ST_FM) && !STp->eof_hit) {
+    (STp->mt_status)->mt_fileno++;
+    STp->drv_block = 0;
+  }
+
    return result;
  
  }
@@ -1849,6 +1854,10 @@ st_ioctl(struct inode * inode,struct file * file,
       if (i)
         return i;
  
+     i = flush_buffer(inode, file, FALSE);
+     if (i < 0)
+       return i;
+
       (STp->mt_status)->mt_dsreg =
         ((STp->block_size << MT_ST_BLKSIZE_SHIFT) & MT_ST_BLKSIZE_MASK) |
         ((STp->density << MT_ST_DENSITY_SHIFT) & MT_ST_DENSITY_MASK);
diff --git a/fs/fat/dir.c b/fs/fat/dir.c

index e40ea14e64ad8df070dcf5b0752d905e4ed83fa7..22f6e23318c75257cee7e92753b2854a97a937f7 100644 (file)
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -17,6 +17,7 @@
  #include <linux/stat.h>
  #include <linux/string.h>
  #include <linux/ioctl.h>
+#include <linux/dirent.h>
  
  #include <asm/segment.h>
  
@@ -44,30 +45,12 @@ struct file_operations fat_dir_operations = {
         file_fsync              /* fsync */
  };
  
-
-int fat_dir_ioctl(struct inode * inode, struct file * filp,
-                 unsigned int cmd, unsigned long arg)
-{
-       switch (cmd) {
-#if 0
-       /*
-        * We want to provide an interface for Samba to be able
-        * to get the short filename for a given long filename.
-        * We should be able to accomplish by modifying fat_readdir
-        * slightly.
-        */
-       case VFAT_LONGNAME_TO_SHORT:
-#endif
-       default:
-               return -EINVAL;
-       }
-}
-
-int fat_readdir(
+int fat_readdirx(
         struct inode *inode,
         struct file *filp,
         void *dirent,
-       filldir_t filldir)
+       filldir_t filldir,
+       int both)
  {
         struct super_block *sb = inode->i_sb;
         int ino,i,i2,last;
@@ -76,7 +59,7 @@ int fat_readdir(
         struct msdos_dir_entry *de;
         unsigned long oldpos = filp->f_pos;
         int is_long;
-       char longname[256];
+       char longname[275];
         unsigned char long_len = 0; /* Make compiler warning go away */
         unsigned char alias_checksum = 0; /* Make compiler warning go away */
  
@@ -209,7 +192,7 @@ int fat_readdir(
                         }
                         PRINTK(("Long filename: %s, get_new_entry: %d\n", longname, get_new_entry));
                 } else if (!IS_FREE(de->name) && !(de->attr & ATTR_VOLUME)) {
-                       char bufname[13];
+                       char bufname[14];
                         char *ptname = bufname;
                         int dotoffset = 0;
  
@@ -258,11 +241,20 @@ int fat_readdir(
                                         ino = fat_parent_ino(inode,0);
  
                                 if (!is_long) {
+                                       dcache_add(inode, bufname, i+dotoffset, ino);
+                                       if (both) {
+                                               bufname[i+dotoffset] = '\0';
+                                       }
                                         if (filldir(dirent, bufname, i+dotoffset, oldpos, ino) < 0) {
                                                 filp->f_pos = oldpos;
                                                 break;
                                         }
                                 } else {
+                                       dcache_add(inode, longname, long_len, ino);
+                                       if (both) {
+                                               memcpy(&longname[long_len+1], bufname, i+dotoffset);
+                                               long_len += i+dotoffset;
+                                       }
                                         if (filldir(dirent, longname, long_len, oldpos, ino) < 0) {
                                                 filp->f_pos = oldpos;
                                                 break;
@@ -280,3 +272,80 @@ int fat_readdir(
         if (bh) brelse(bh);
         return 0;
  }
+
+int fat_readdir(
+       struct inode *inode,
+       struct file *filp,
+       void *dirent,
+       filldir_t filldir)
+{
+    return fat_readdirx(inode, filp, dirent, filldir, 0);
+}
+static int vfat_ioctl_fill(
+       void * buf,
+       const char * name,
+       int name_len,
+       off_t offset,
+       ino_t ino)
+{
+       struct dirent *d1 = (struct dirent *)buf;
+       struct dirent *d2 = d1 + 1;
+       int len, slen;
+       int dotdir;
+
+       if (get_user(&d1->d_reclen) != 0) {
+               return -1;
+       }
+
+       if ((name_len == 1 && name[0] == '.') ||
+           (name_len == 2 && name[0] == '.' && name[1] == '.')) {
+               dotdir = 1;
+               len = name_len;
+       } else {
+               dotdir = 0;
+               len = strlen(name);
+       }
+       if (len != name_len) {
+               memcpy_tofs(d2->d_name, name, len);
+               put_user(0, d2->d_name + len);
+               put_user(len, &d2->d_reclen);
+               put_user(ino, &d2->d_ino);
+               put_user(offset, &d2->d_off);
+               slen = name_len - len;
+               memcpy_tofs(d1->d_name, name+len+1, slen);
+               put_user(0, d1->d_name+slen);
+               put_user(slen, &d1->d_reclen);
+       } else {
+               put_user(0, d2->d_name);
+               put_user(0, &d2->d_reclen);
+               memcpy_tofs(d1->d_name, name, len);
+               put_user(0, d1->d_name+len);
+               put_user(len, &d1->d_reclen);
+       }
+       PRINTK(("FAT d1=%p d2=%p len=%d, name_len=%d\n",
+               d1, d2, len, name_len));
+
+       return 0;
+}
+
+int fat_dir_ioctl(struct inode * inode, struct file * filp,
+                 unsigned int cmd, unsigned long arg)
+{
+       /*
+        * We want to provide an interface for Samba to be able
+        * to get the short filename for a given long filename.
+        * Samba should use this ioctl instead of readdir() to
+        * get the information it needs.
+        */
+       switch (cmd) {
+       case VFAT_IOCTL_READDIR_BOTH: {
+               struct dirent *d1 = (struct dirent *)arg;
+               put_user(0, &d1->d_reclen);
+               return fat_readdirx(inode,filp,(void *)arg,vfat_ioctl_fill,1);
+       }
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
diff --git a/fs/fat/inode.c b/fs/fat/inode.c

index 34dddcd12eee7cf3c48fc8fdaa8282f36d5d991c..55d2fa675c318b5e265f04ca3910784683fad471 100644 (file)
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -434,7 +434,7 @@ void fat_read_inode(struct inode *inode, struct inode_operations *fs_dir_inode_o
                 inode->i_size = CF_LE_L(raw_entry->size);
         }
         if(raw_entry->attr & ATTR_SYS)
-               if (MSDOS_I(inode)->sys_immutable)
+               if (MSDOS_SB(inode->i_sb)->sys_immutable)
                         inode->i_flags |= S_IMMUTABLE;
         MSDOS_I(inode)->i_binary = is_binary(MSDOS_SB(inode->i_sb)->conversion,
             raw_entry->ext);
diff --git a/fs/proc/array.c b/fs/proc/array.c

index 1e2c9c64b072dfefc9e6831feaba9a7d3980f2e0..eed832f0c431b27f6762e25320f201fda382b815 100644 (file)
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -123,9 +123,6 @@ struct inode_operations proc_kcore_inode_operations = {
  };
  
  
-extern unsigned long prof_len;
-extern unsigned long * prof_buffer;
-extern unsigned long prof_shift;
  /*
   * This function accesses profiling information. The returned data is
   * binary: the sampling step and the actual contents of the profile
@@ -137,21 +134,21 @@ static int read_profile(struct inode *inode, struct file *file, char *buf, int c
         unsigned long p = file->f_pos;
         int read;
         char * pnt;
-       unsigned long sample_step = 1 << prof_shift;
+       unsigned int sample_step = 1 << prof_shift;
  
         if (count < 0)
                 return -EINVAL;
-       if (p >= (prof_len+1)*sizeof(unsigned long))
+       if (p >= (prof_len+1)*sizeof(unsigned int))
                 return 0;
-       if (count > (prof_len+1)*sizeof(unsigned long) - p)
-               count = (prof_len+1)*sizeof(unsigned long) - p;
+       if (count > (prof_len+1)*sizeof(unsigned int) - p)
+               count = (prof_len+1)*sizeof(unsigned int) - p;
         read = 0;
  
-       while (p < sizeof(unsigned long) && count > 0) {
+       while (p < sizeof(unsigned int) && count > 0) {
                 put_user(*((char *)(&sample_step)+p),buf);
                 buf++; p++; count--; read++;
         }
-       pnt = (char *)prof_buffer + p - sizeof(unsigned long);
+       pnt = (char *)prof_buffer + p - sizeof(unsigned int);
         memcpy_tofs(buf,(void *)pnt,count);
         read += count;
         file->f_pos += read;
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h

index dfd14116da862486eaa44d32acd12ce4219c067e..19dabc1d7e4eb49e41ce6dafd537a2a15d942496 100644 (file)
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -37,6 +37,12 @@
  #define ATTR_EXT     (ATTR_RO | ATTR_HIDDEN | ATTR_SYS | ATTR_VOLUME)
         /* bits that are used by the Windows 95/Windows NT extended FAT */
  
+#define ATTR_DIR_READ_BOTH 512 /* read both short and long names from the
+                               * vfat filesystem.  This is used by Samba
+                               * to export the vfat filesystem with correct
+                               * shortnames. */
+#define ATTR_DIR_READ_SHORT 1024
+
  #define CASE_LOWER_BASE 8      /* base is lower case */
  #define CASE_LOWER_EXT  16     /* extension is lower case */
  
@@ -63,6 +69,17 @@
  
  #define MSDOS_FAT12 4078 /* maximum number of clusters in a 12 bit FAT */
  
+/*
+ * Inode flags
+ */
+#define FAT_BINARY_FL          0x00000001 /* File contains binary data */
+
+/*
+ * ioctl commands
+ */
+#define        VFAT_IOCTL_READDIR_BOTH         _IOR('r', 1, long)
+#define        VFAT_IOCTL_READDIR_SHORT        _IOW('r', 2, long)
+
  /*
   * Conversion from and to little-endian byte order. (no-op on i386/i486)
   *
@@ -128,14 +145,6 @@ struct slot_info {
         int ino;                       /* ino for the file */
  };
  
-struct fat_cache {
-       kdev_t device; /* device number. 0 means unused. */
-       int ino; /* inode number. */
-       int file_cluster; /* cluster number in the file. */
-       int disk_cluster; /* cluster number on disk. */
-       struct fat_cache *next; /* next cache entry */
-};
-
  /* Determine whether this FS has kB-aligned data. */
  #define MSDOS_CAN_BMAP(mib) (!(((mib)->cluster_size & 1) || \
      ((mib)->data_start & 1)))
@@ -149,6 +158,14 @@ struct fat_cache {
  
  #ifdef __KERNEL__
  
+struct fat_cache {
+       kdev_t device; /* device number. 0 means unused. */
+       int ino; /* inode number. */
+       int file_cluster; /* cluster number in the file. */
+       int disk_cluster; /* cluster number on disk. */
+       struct fat_cache *next; /* next cache entry */
+};
+
  /* misc.c */
  extern int is_binary(char conversion,char *extension);
  extern void lock_fat(struct super_block *sb);
diff --git a/include/linux/msdos_fs_i.h b/include/linux/msdos_fs_i.h

index ad7dc77778e2a92f5631c68b0fccb694098b9879..b11e224835cb0305049d2c61b30b74f4e7d816de 100644 (file)
--- a/include/linux/msdos_fs_i.h
+++ b/include/linux/msdos_fs_i.h
@@ -34,7 +34,6 @@ struct msdos_inode_info {
         struct inode *i_old;    /* pointer to the old inode this inode
                                    depends on */
         int i_binary;   /* file contains non-text data */
-       int sys_immutable;      /* file is an immutable system file */
  };
  
  #endif
diff --git a/include/linux/random.h b/include/linux/random.h

index dceae6815c1219c92112915a3c4baf7ddd29fffe..8fce34d59afccbeac684468395ff6c5bb556a0ab 100644 (file)
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -25,18 +25,20 @@ struct rand_pool_info {
  
  #ifdef __KERNEL__
  
-void rand_initialize(void);
-void rand_initialize_irq(int irq);
-void rand_initialize_blkdev(int irq, int mode);
+extern void rand_initialize(void);
+extern void rand_initialize_irq(int irq);
+extern void rand_initialize_blkdev(int irq, int mode);
  
-void add_keyboard_randomness(unsigned char scancode);
-void add_mouse_randomness(__u32 mouse_data);
-void add_interrupt_randomness(int irq);
-void add_blkdev_randomness(int major);
+extern void add_keyboard_randomness(unsigned char scancode);
+extern void add_mouse_randomness(__u32 mouse_data);
+extern void add_interrupt_randomness(int irq);
+extern void add_blkdev_randomness(int major);
  
-void get_random_bytes(void *buf, int nbytes);
+extern void get_random_bytes(void *buf, int nbytes);
  
-struct file_operations random_fops, urandom_fops;
+#ifndef MODULE
+extern struct file_operations random_fops, urandom_fops;
+#endif
  
  #endif /* __KERNEL___ */
  
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 3b4161d4b43ae45c6a427ca3cf47df555de966cd..e881efdf08af7eb1b7658f8fc949368c367583f4 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -314,7 +314,7 @@ extern struct timeval xtime;
  extern int need_resched;
  extern void do_timer(struct pt_regs *);
  
-extern unsigned long * prof_buffer;
+extern unsigned int * prof_buffer;
  extern unsigned long prof_len;
  extern unsigned long prof_shift;
  
diff --git a/include/linux/tcp.h b/include/linux/tcp.h

index 5805203ea30ba4a098b67dc690eee88451d1e202..ae6a063e32dc72dbc1033f8d65add497aecbe416 100644 (file)
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -17,6 +17,9 @@
  #ifndef _LINUX_TCP_H
  #define _LINUX_TCP_H
  
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
  struct tcphdr {
         __u16   source;
         __u16   dest;
diff --git a/include/linux/time.h b/include/linux/time.h

index d775698599c4e547424f23a628cece9fc473a5af..be81225b8ff21c171e214fe95ad048679ddbc3ea 100644 (file)
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -2,8 +2,8 @@
  #define _LINUX_TIME_H
  
  struct timespec {
-        long    tv_sec;         /* seconds */
-        long    tv_nsec;        /* nanoseconds */
+       long    ts_sec;         /* seconds */
+       long    ts_nsec;        /* nanoseconds */
  };
  
  struct timeval {
diff --git a/include/net/icmp.h b/include/net/icmp.h

index 131ea237b5a0173589e9bf91249418e9306e905e..e4ae8213057a7d03e588ddd26ddd8cf1aa4308a8 100644 (file)
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -19,6 +19,10 @@
  #define        _ICMP_H
  
  #include <linux/icmp.h>
+#include <linux/skbuff.h>
+
+#include <net/sock.h>
+#include <net/protocol.h>
  
  extern struct icmp_err icmp_err_convert[];
  extern struct icmp_mib icmp_statistics;
diff --git a/include/net/protocol.h b/include/net/protocol.h

index 5e54fc4bcea39db8841f657eed369ba67ecb86a3..ae328b6982eb19b653863e727c4132cf6b7e8b0c 100644 (file)
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -23,7 +23,6 @@
  #ifndef _PROTOCOL_H
  #define _PROTOCOL_H
  
-
  #define MAX_INET_PROTOS        32              /* Must be a power of 2         */
  
  
diff --git a/include/net/tcp.h b/include/net/tcp.h

index ca66f27180612eb44039a38d569a314f5b9f6ff5..4a820364ca15845b6bbfc12fed642c57081ae327 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -19,6 +19,7 @@
  #define _TCP_H
  
  #include <linux/tcp.h>
+#include <net/checksum.h>
  
  #define MAX_SYN_SIZE   44 + MAX_HEADER + 15
  #define MAX_FIN_SIZE   40 + MAX_HEADER + 15
@@ -104,24 +105,16 @@ extern __inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
         return (after(seq1+1, seq2) && before(seq1, seq3+1));
  }
  
-
-/*
- * List all states of a TCP socket that can be viewed as a "connected"
- * state.  This now includes TCP_SYN_RECV, although I am not yet fully
- * convinced that this is the solution for the 'getpeername(2)'
- * problem. Thanks to Stephen A. Wood <saw@cebaf.gov>  -FvK
- */
-
-extern __inline const int tcp_connected(const int state)
+static __inline__ int min(unsigned int a, unsigned int b)
  {
-  return(state == TCP_ESTABLISHED || state == TCP_CLOSE_WAIT ||
-        state == TCP_FIN_WAIT1   || state == TCP_FIN_WAIT2 ||
-        state == TCP_SYN_RECV);
+       if (a < b) 
+               return(a);
+       return(b);
  }
  
-
  extern struct proto tcp_prot;
-
+extern struct tcp_mib tcp_statistics;
+extern struct wait_queue *master_select_wakeup;
  
  extern void    tcp_err(int type, int code, unsigned char *header, __u32 daddr,
                         __u32, struct inet_protocol *protocol);
@@ -131,13 +124,195 @@ extern int       tcp_rcv(struct sk_buff *skb, struct device *dev,
                         unsigned short len, __u32 saddr, int redo,
                         struct inet_protocol *protocol);
  
-extern int     tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
  
+extern void tcp_read_wakeup(struct sock *);
+extern void tcp_write_xmit(struct sock *);
+extern void tcp_time_wait(struct sock *);
+extern void tcp_retransmit(struct sock *, int);
+extern void tcp_do_retransmit(struct sock *, int);
  extern void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
                 unsigned long daddr, int len, struct sock *sk);
-extern void tcp_send_probe0(struct sock *sk);
+
+/* tcp_output.c */
+
+extern void tcp_send_probe0(struct sock *);
+extern void tcp_send_partial(struct sock *);
+extern void tcp_write_wakeup(struct sock *);
+extern void tcp_send_fin(struct sock *sk);
+extern void tcp_send_synack(struct sock *, struct sock *, struct sk_buff *);
+extern void tcp_send_skb(struct sock *, struct sk_buff *);
+extern void tcp_send_ack(u32, u32, struct sock *sk, struct tcphdr *th, u32);
+extern void tcp_send_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
+         struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl);
+
  extern void tcp_enqueue_partial(struct sk_buff *, struct sock *);
  extern struct sk_buff * tcp_dequeue_partial(struct sock *);
+
+/* tcp_input.c */
  extern void tcp_cache_zap(void);
  
+/* tcp_timer.c */
+#define     tcp_reset_msl_timer(x,y,z) reset_timer(x,y,z)
+extern void tcp_reset_xmit_timer(struct sock *, int, unsigned long);
+extern void tcp_retransmit_timer(unsigned long);
+
+/*
+ *     Default sequence number picking algorithm.
+ *     As close as possible to RFC 793, which
+ *     suggests using a 250kHz clock.
+ *     Further reading shows this assumes 2MB/s networks.
+ *     For 10MB/s ethernet, a 1MHz clock is appropriate.
+ *     That's funny, Linux has one built in!  Use it!
+ */
+
+static inline u32 tcp_init_seq(void)
+{
+       struct timeval tv;
+       do_gettimeofday(&tv);
+       return tv.tv_usec+tv.tv_sec*1000000;
+}
+
+/*
+ *      This function returns the amount that we can raise the
+ *      usable window based on the following constraints
+ *  
+ *     1. The window can never be shrunk once it is offered (RFC 793)
+ *     2. We limit memory per socket
+ */
+
+static __inline__ unsigned short tcp_raise_window(struct sock *sk)
+{
+       long free_space = sock_rspace(sk);
+       long window;
+
+       if (free_space > 1024)
+               free_space &= ~0x3FF; /* make free space a multiple of 1024 */
+
+       if(sk->window_clamp)
+               free_space = min(sk->window_clamp, free_space);
+ 
+       /* 
+         * compute the actual window i.e. 
+         * old_window - received_bytes_on_that_win 
+        */
+
+       window = sk->window - (sk->acked_seq - sk->lastwin_seq);
+
+       if (sk->mss == 0)
+               sk->mss = sk->mtu;
+ 
+       if ( window < 0 ) {     
+               window = 0;
+               printk(KERN_DEBUG "TRW: win < 0 w=%d 1=%u 2=%u\n", 
+                      sk->window, sk->acked_seq, sk->lastwin_seq);
+       }
+       
+       if ( (free_space - window) >= min(sk->mss, MAX_WINDOW/2) )
+               return ((free_space - window) / sk->mss) * sk->mss;
+
+       return 0;
+}
+
+static __inline__ unsigned short tcp_select_window(struct sock *sk)
+{
+       long free_space = sock_rspace(sk);
+       long window;
+
+       if (free_space > 1024)
+               free_space &= ~0x3FF;   /* make free space a multiple of 1024 */
+
+       if (sk->window_clamp)
+               free_space = min(sk->window_clamp, free_space);
+       
+       /*
+        * compute the actual window i.e.
+        * old_window - received_bytes_on_that_win
+        */
+
+       if (sk->mss == 0)
+               sk->mss = sk->mtu;
+
+       window = sk->window - (sk->acked_seq - sk->lastwin_seq);
+
+       if ( window < 0 ) {
+               window = 0;
+               printk(KERN_DEBUG "TSW: win < 0 w=%d 1=%u 2=%u\n",
+                       sk->window, sk->acked_seq, sk->lastwin_seq);
+       }
+
+       /*
+        * RFC 1122:
+        * "the suggested [SWS] avoidance algoritm for the receiver is to keep
+        *  RECV.NEXT + RCV.WIN fixed until:
+        *  RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
+        *
+        * i.e. don't raise the right edge of the window until you can't raise
+        * it MSS bytes
+        */
+
+       if ( (free_space - window) >= min(sk->mss, MAX_WINDOW/2) )
+               window += ((free_space - window) / sk->mss) * sk->mss;
+
+       sk->window = window;
+       sk->lastwin_seq = sk->acked_seq;
+
+       return sk->window;
+}
+
+/*
+ * List all states of a TCP socket that can be viewed as a "connected"
+ * state.  This now includes TCP_SYN_RECV, although I am not yet fully
+ * convinced that this is the solution for the 'getpeername(2)'
+ * problem. Thanks to Stephen A. Wood <saw@cebaf.gov>  -FvK
+ */
+
+extern __inline const int tcp_connected(const int state)
+{
+  return(state == TCP_ESTABLISHED || state == TCP_CLOSE_WAIT ||
+        state == TCP_FIN_WAIT1   || state == TCP_FIN_WAIT2 ||
+        state == TCP_SYN_RECV);
+}
+
+/*
+ * Calculate(/check) TCP checksum
+ */
+static __inline__ u16 tcp_check(struct tcphdr *th, int len,
+       unsigned long saddr, unsigned long daddr, unsigned long base)
+{
+       return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
+}
+
+#undef STATE_TRACE
+
+#ifdef STATE_TRACE
+static char *statename[]={
+       "Unused","Established","Syn Sent","Syn Recv",
+       "Fin Wait 1","Fin Wait 2","Time Wait", "Close",
+       "Close Wait","Last ACK","Listen","Closing"
+};
+#endif
+
+static __inline__ void tcp_set_state(struct sock *sk, int state)
+{
+       if(sk->state==TCP_ESTABLISHED)
+               tcp_statistics.TcpCurrEstab--;
+#ifdef STATE_TRACE
+       if(sk->debug)
+               printk("TCP sk=%p, State %s -> %s\n",sk, statename[sk->state],statename[state]);
+#endif 
+       /* This is a hack but it doesn't occur often and it's going to
+          be a real        to fix nicely */
+          
+       if(state==TCP_ESTABLISHED && sk->state==TCP_SYN_RECV)
+       {
+               wake_up_interruptible(&master_select_wakeup);
+       }
+       sk->state=state;
+       if(state==TCP_ESTABLISHED)
+               tcp_statistics.TcpCurrEstab++;
+       if(sk->state==TCP_CLOSE)
+               tcp_cache_zap();
+}
+
  #endif /* _TCP_H */
diff --git a/init/main.c b/init/main.c

index 2088171635ea7840ae828d98f1f25473d8207599..f615ba367a4d8e5c1c11898b68f06b3514f1ee94 100644 (file)
--- a/init/main.c
+++ b/init/main.c
@@ -456,7 +456,7 @@ static void parse_options(char *line)
                 if (!strncmp(line, "nfsaddrs=", 9)) {
                         line += 9;
                         strncpy(nfs_root_addrs, line, sizeof(nfs_root_addrs));
-                       nfs_root_addrs[sizeof(nfs_root_addrs)] = '\0';
+                       nfs_root_addrs[sizeof(nfs_root_addrs)-1] = '\0';
                         continue;
                 }
  #endif
@@ -620,11 +620,11 @@ asmlinkage void start_kernel(void)
  #endif
  #endif
         if (prof_shift) {
-               prof_buffer = (unsigned long *) memory_start;
+               prof_buffer = (unsigned int *) memory_start;
                 /* only text is profiled */
                 prof_len = (unsigned long) &_etext - (unsigned long) &_stext;
                 prof_len >>= prof_shift;
-               memory_start += prof_len * sizeof(unsigned long);
+               memory_start += prof_len * sizeof(unsigned int);
         }
         memory_start = console_init(memory_start,memory_end);
  #ifdef CONFIG_PCI
diff --git a/kernel/sched.c b/kernel/sched.c

index 4fe4b07a7fd80dcdcdfa5a13fdb4c67f330f7392..aaa8867c337be9f2172a2414f7d94874913ca617 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -72,7 +72,7 @@ int need_resched = 0;
  unsigned long event = 0;
  
  extern int _setitimer(int, struct itimerval *, struct itimerval *);
-unsigned long * prof_buffer = NULL;
+unsigned int * prof_buffer = NULL;
  unsigned long prof_len = 0;
  unsigned long prof_shift = 0;
  
@@ -1115,8 +1115,8 @@ asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
         if (error)
                 return error;
         
-       t.tv_sec = 0;
-       t.tv_nsec = 0;   /* <-- Linus, please fill correct value in here */
+       t.ts_sec = 0;
+       t.ts_nsec = 0;   /* <-- Linus, please fill correct value in here */
         return -ENOSYS;  /* and then delete this line. Thanks!           */
         memcpy_tofs(interval, &t, sizeof(struct timespec));
  
diff --git a/net/core/sock.c b/net/core/sock.c

index f133d9798e4e2723e21e09bd6ccf34bfde888f23..a864b34a631d94d19b3b3817eed632a8c5a38e8a 100644 (file)
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -169,19 +169,19 @@ int sock_setsockopt(struct sock *sk, int level, int optname,
                         sk->broadcast=valbool;
                         return 0;
                 case SO_SNDBUF:
-                       if(val>32767)
-                               val=32767;
-                       if(val<256)
-                               val=256;
-                       sk->sndbuf=val;
+                       if(val > SK_WMEM_MAX*2)
+                               val = SK_WMEM_MAX*2;
+                       if(val < 256)
+                               val = 256;
+                       sk->sndbuf = val;
                         return 0;
  
                 case SO_RCVBUF:
-                       if(val>32767)
-                               val=32767;
-                       if(val<256)
-                               val=256;
-                       sk->rcvbuf=val;
+                       if(val > SK_RMEM_MAX*2)
+                               val = SK_RMEM_MAX*2;
+                       if(val < 256)
+                               val = 256;
+                       sk->rcvbuf = val;
                         return(0);
  
                 case SO_KEEPALIVE:
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile

index 9266741496906552c3974b8f33a94333b81e392d..6bd0230611cbdaa29373e4402cfb28c49c42ab40 100644 (file)
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -8,10 +8,11 @@
  # Note 2! The CFLAGS definition is now in the main makefile...
  
  O_TARGET := ipv4.o
-IPV4_OBJS      := utils.o route.o proc.o timer.o protocol.o packet.o \
-                  arp.o ip_input.o ip_fragment.o ip_forward.o ip_options.o \
-                  ip_output.o ip_sockglue.o raw.o icmp.o tcp.o udp.o \
-                  devinet.o af_inet.o igmp.o ip_fw.o
+IPV4_OBJS := utils.o route.o proc.o timer.o protocol.o packet.o \
+            ip_input.o ip_fragment.o ip_forward.o ip_options.o \
+            ip_output.o ip_sockglue.o \
+            tcp.o tcp_input.o tcp_output.o tcp_timer.o \
+            raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o ip_fw.o
  
  MOD_LIST_NAME := IPV4_MODULES
  M_OBJS :=
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c

index d40e8c6940ef4f3626e9d32a5d61ac3b43e923ac..98aaa4abd220939fc3423a5e7d8b7ceaa0c85c8a 100644 (file)
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -290,13 +290,6 @@ void destroy_sock(struct sock *sk)
  
         sk->inuse = 1;                  /* just to be safe. */
  
-       /* 
-        *      In case it's sleeping somewhere. 
-        */
-        
-       if (!sk->dead) 
-               sk->write_space(sk);
-
         remove_sock(sk);
    
         /*
@@ -326,6 +319,13 @@ void destroy_sock(struct sock *sk)
                 kfree_skb(skb, FREE_WRITE);
         }
         
+       /* 
+        *      In case it's sleeping somewhere. 
+        */
+        
+       if (!sk->dead) 
+               sk->write_space(sk);
+
         /*
          *      Don't discard received data until the user side kills its
          *      half of the socket.
@@ -383,6 +383,7 @@ void destroy_sock(struct sock *sk)
         while((skb=skb_dequeue(&sk->back_log))!=NULL) 
         {
                 /* this should [almost] never happen. */
+               skb->sk = NULL;
                 kfree_skb(skb, FREE_READ);
         }
  
@@ -562,7 +563,7 @@ static void def_callback2(struct sock *sk,int len)
  
  static void def_callback3(struct sock *sk)
  {
-       if(!sk->dead)
+       if(!sk->dead && sk->wmem_alloc*2 <= sk->sndbuf)
         {
                 wake_up_interruptible(sk->sleep);
                 sock_wake_async(sk->socket, 2);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c

index 58d58e07960419ab5446a31c8a44bb817a8635aa..1f97b19b506de9170a390a14b470749a031925f8 100644 (file)
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -33,7 +33,7 @@
   *                                     wakes people on errors. select 
   *                                     behaves and the icmp error race
   *                                     has gone by moving it into sock.c
- *             Alan Cox        :       tcp_reset() fixed to work for 
+ *             Alan Cox        :       tcp_send_reset() fixed to work for 
   *                                     everything not just packets for 
   *                                     unknown sockets.
   *             Alan Cox        :       tcp option processing.
@@ -410,206 +410,25 @@
   * (Whew. -- MS 950903)
   **/
  
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/time.h>
-#include <linux/string.h>
  #include <linux/config.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/termios.h>
-#include <linux/in.h>
+#include <linux/types.h>
  #include <linux/fcntl.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <net/snmp.h>
-#include <net/ip.h>
-#include <net/protocol.h>
+
  #include <net/icmp.h>
  #include <net/tcp.h>
-#include <net/arp.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/route.h>
-#include <linux/errno.h>
-#include <linux/timer.h>
-#include <asm/system.h>
-#include <asm/segment.h>
-#include <linux/mm.h>
-#include <net/checksum.h>
  
-/*
- *     The MSL timer is the 'normal' timer.
- */
- 
-#define reset_msl_timer(x,y,z) reset_timer(x,y,z)
+#include <asm/segment.h>
  
-#define SEQ_TICK 3
  unsigned long seq_offset;
  struct tcp_mib tcp_statistics;
  
-/*
- *     Cached last hit socket
- */
- 
-volatile unsigned long         th_cache_saddr,th_cache_daddr;
-volatile unsigned short  th_cache_dport, th_cache_sport;
-volatile struct sock *th_cache_sk;
-
-void tcp_cache_zap(void)
-{
-       unsigned long flags;
-       save_flags(flags);
-       cli();
-       th_cache_saddr=0;
-       th_cache_daddr=0;
-       th_cache_dport=0;
-       th_cache_sport=0;
-       th_cache_sk=NULL;
-       restore_flags(flags);
-}
-
  static void tcp_close(struct sock *sk, int timeout);
-static void tcp_read_wakeup(struct sock *sk);
  
  /*
   *     The less said about this the better, but it works and will do for 1.2  (and 1.4 ;))
   */
  
-static struct wait_queue *master_select_wakeup;
-
-static __inline__ int min(unsigned int a, unsigned int b)
-{
-       if (a < b) 
-               return(a);
-       return(b);
-}
-
-#undef STATE_TRACE
-
-#ifdef STATE_TRACE
-static char *statename[]={
-       "Unused","Established","Syn Sent","Syn Recv",
-       "Fin Wait 1","Fin Wait 2","Time Wait", "Close",
-       "Close Wait","Last ACK","Listen","Closing"
-};
-#endif
-
-static __inline__ void tcp_set_state(struct sock *sk, int state)
-{
-       if(sk->state==TCP_ESTABLISHED)
-               tcp_statistics.TcpCurrEstab--;
-#ifdef STATE_TRACE
-       if(sk->debug)
-               printk("TCP sk=%p, State %s -> %s\n",sk, statename[sk->state],statename[state]);
-#endif 
-       /* This is a hack but it doesn't occur often and it's going to
-          be a real        to fix nicely */
-          
-       if(state==TCP_ESTABLISHED && sk->state==TCP_SYN_RECV)
-       {
-               wake_up_interruptible(&master_select_wakeup);
-       }
-       sk->state=state;
-       if(state==TCP_ESTABLISHED)
-               tcp_statistics.TcpCurrEstab++;
-       if(sk->state==TCP_CLOSE)
-               tcp_cache_zap();
-}
-
-/*
- *     This routine picks a TCP windows for a socket based on
- *     the following constraints
- *  
- *     1. The window can never be shrunk once it is offered (RFC 793)
- *     2. We limit memory per socket
- */
-
-
-static __inline__ unsigned short tcp_select_window(struct sock *sk)
-{
-       long free_space = sock_rspace(sk);      
-       long window = 0;
-
-       if (free_space > 1024)
-               free_space &= ~0x3FF;  /* make free space a multiple of 1024 */
- 
-       if(sk->window_clamp)
-               free_space = min(sk->window_clamp, free_space);
- 
-       /* 
-         * compute the actual window i.e. 
-         * old_window - received_bytes_on_that_win 
-        */
-
-       if (sk->mss == 0)
-               sk->mss = sk->mtu;
-
-       window = sk->window - (sk->acked_seq - sk->lastwin_seq);
- 
-       if ( window < 0 ) {     
-               window = 0;
-               printk(KERN_DEBUG "TSW: win < 0 w=%d 1=%u 2=%u\n", 
-                      sk->window, sk->acked_seq, sk->lastwin_seq);
-       }
-
-        /*
-        * RFC 1122:
-        * "the suggested [SWS] avoidance algoritm for the receiver is to keep
-        *  RECV.NEXT + RCV.WIN fixed until:
-        *  RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
-        * 
-        * i.e. don't raise the right edge of the window until you can't raise
-        * it MSS bytes
-        */
-       
-       if ( (free_space - window) >= min(sk->mss, MAX_WINDOW/2) )
-               window += ((free_space - window) / sk->mss) * sk->mss;
-       
-       sk->window = window;
-       sk->lastwin_seq = sk->acked_seq;
-       
-       return sk->window;
-}
-
-/*
- *      This function returns the amount that we can raise the
- *      usable window.
- */
-
-static __inline__ unsigned short tcp_raise_window(struct sock *sk)
-{
-       long free_space = sock_rspace(sk);
-       long window = 0;
-
-       if (free_space > 1024)
-               free_space &= ~0x3FF; /* make free space a multiple of 1024 */
-
-       if(sk->window_clamp)
-               free_space = min(sk->window_clamp, free_space);
- 
-       /* 
-         * compute the actual window i.e. 
-         * old_window - received_bytes_on_that_win 
-        */
-
-       window = sk->window - (sk->acked_seq - sk->lastwin_seq);
-
-       if (sk->mss == 0)
-               sk->mss = sk->mtu;
- 
-       if ( window < 0 ) {     
-               window = 0;
-               printk(KERN_DEBUG "TRW: win < 0 w=%d 1=%u 2=%u\n", 
-                      sk->window, sk->acked_seq, sk->lastwin_seq);
-       }
-       
-       if ( (free_space - window) >= min(sk->mss, MAX_WINDOW/2) )
-               return ((free_space - window) / sk->mss) * sk->mss;
-
-       return 0;
-}
+struct wait_queue *master_select_wakeup;
  
  /*
   *     Find someone to 'accept'. Must be called with
@@ -672,585 +491,141 @@ static void tcp_close_pending (struct sock *sk)
   *     Enter the time wait state. 
   */
  
-static void tcp_time_wait(struct sock *sk)
+void tcp_time_wait(struct sock *sk)
  {
         tcp_set_state(sk,TCP_TIME_WAIT);
         sk->shutdown = SHUTDOWN_MASK;
         if (!sk->dead)
                 sk->state_change(sk);
-       reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
+       tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
  }
  
+
  /*
- *     A socket has timed out on its send queue and wants to do a
- *     little retransmitting. Currently this means TCP.
+ * This routine is called by the ICMP module when it gets some
+ * sort of error condition.  If err < 0 then the socket should
+ * be closed and the error returned to the user.  If err > 0
+ * it's just the icmp type << 8 | icmp code.  After adjustment
+ * header points to the first 8 bytes of the tcp header.  We need
+ * to find the appropriate port.
   */
  
-void tcp_do_retransmit(struct sock *sk, int all)
+void tcp_err(int type, int code, unsigned char *header, __u32 daddr,
+       __u32 saddr, struct inet_protocol *protocol)
  {
-       struct sk_buff * skb;
-       struct proto *prot;
-       struct device *dev;
-       int ct=0;
-       struct rtable *rt;
-
-       prot = sk->prot;
-       skb = sk->send_head;
+       struct tcphdr *th = (struct tcphdr *)header;
+       struct sock *sk;
+       
+       /*
+        *      This one is _WRONG_. FIXME urgently.
+        */
+#ifndef CONFIG_NO_PATH_MTU_DISCOVERY    
+       struct iphdr *iph=(struct iphdr *)(header-sizeof(struct iphdr));
+#endif  
+       th =(struct tcphdr *)header;
+       sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr);
  
-       while (skb != NULL)
+       if (sk == NULL) 
+               return;
+  
+       if (type == ICMP_SOURCE_QUENCH) 
         {
-               struct tcphdr *th;
-               struct iphdr *iph;
-               int size;
-
-               dev = skb->dev;
-               IS_SKB(skb);
-               skb->when = jiffies;
-               
-               /* dl1bke 960201 - @%$$! Hope this cures strange race conditions    */
-               /*                 with AX.25 mode VC. (esp. DAMA)                  */
-               /*                 if the buffer is locked we should not retransmit */
-               /*                 anyway, so we don't need all the fuss to prepare */
-               /*                 the buffer in this case.                         */
-               /*                 (the skb_pull() changes skb->data while we may   */
-               /*                 actually try to send the data. Ough. A side      */
-               /*                 effect is that we'll send some unnecessary data, */
-               /*                 but the alternative is desastrous...             */
-               
-               if (skb_device_locked(skb))
-                       break;
-
                 /*
-                *      Discard the surplus MAC header
+                * FIXME:
+                * For now we will just trigger a linear backoff.
+                * The slow start code should cause a real backoff here.
                  */
-                
-               skb_pull(skb,((unsigned char *)skb->ip_hdr)-skb->data);
+               if (sk->cong_window > 4)
+                       sk->cong_window--;
+               return;
+       }
+       
+       if (type == ICMP_PARAMETERPROB)
+       {
+               sk->err=EPROTO;
+               sk->error_report(sk);
+       }
  
+#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
+       if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
+       {
+               struct rtable * rt;
                 /*
-                * In general it's OK just to use the old packet.  However we
-                * need to use the current ack and window fields.  Urg and
-                * urg_ptr could possibly stand to be updated as well, but we
-                * don't keep the necessary data.  That shouldn't be a problem,
-                * if the other end is doing the right thing.  Since we're
-                * changing the packet, we have to issue a new IP identifier.
+                * Ugly trick to pass MTU to protocol layer.
+                * Really we should add argument "info" to error handler.
                  */
+               unsigned short new_mtu = ntohs(iph->id);
  
-               iph = (struct iphdr *)skb->data;
-               th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2));
-               size = ntohs(iph->tot_len) - (iph->ihl<<2);
-               
-               /*
-                *      Note: We ought to check for window limits here but
-                *      currently this is done (less efficiently) elsewhere.
-                */
+               if ((rt = sk->ip_route_cache) != NULL)
+                       if (rt->rt_mtu > new_mtu)
+                               rt->rt_mtu = new_mtu;
  
-               /*
-                *      Put a MAC header back on (may cause ARPing)
-                */
-                
-               {
-                       /* ANK: UGLY, but the bug, that was here, should be fixed.
-                        */
-                       struct options *  opt = (struct options*)skb->proto_priv;
-                       rt = ip_check_route(&sk->ip_route_cache, opt->srr?opt->faddr:iph->daddr, skb->localroute);
-               }
+               if (sk->mtu > new_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr)
+                       && new_mtu > sizeof(struct iphdr)+sizeof(struct tcphdr))
+                       sk->mtu = new_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
  
-               iph->id = htons(ip_id_count++);
-#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
-               if (rt && ntohs(iph->tot_len) > rt->rt_mtu)
-                       iph->frag_off &= ~htons(IP_DF);
+               return;
+       }
  #endif
-               ip_send_check(iph);
-                       
-               if (rt==NULL)   /* Deep poo */
-               {
-                       if(skb->sk)
-                       {
-                               skb->sk->err_soft=ENETUNREACH;
-                               skb->sk->error_report(skb->sk);
-                       }
-               }
-               else
+
+       /*
+        * If we've already connected we will keep trying
+        * until we time out, or the user gives up.
+        */
+
+       if (code < 13)
+       {       
+               if(icmp_err_convert[code].fatal || sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV)
                 {
-                       dev=rt->rt_dev;
-                       skb->raddr=rt->rt_gateway;
-                       skb->dev=dev;
-                       skb->arp=1;
-                       if (rt->rt_hh)
-                       {
-                               memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
-                               if (!rt->rt_hh->hh_uptodate)
-                               {
-                                       skb->arp = 0;
-#if RT_CACHE_DEBUG >= 2
-                                       printk("tcp_do_retransmit: hh miss %08x via %08x\n", iph->daddr, rt->rt_gateway);
-#endif
-                               }
-                       }
-                       else if (dev->hard_header)
-                       {
-                               if(dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, skb->len)<0)
-                                       skb->arp=0;
-                       }
-               
-                       /*
-                        *      This is not the right way to handle this. We have to
-                        *      issue an up to date window and ack report with this 
-                        *      retransmit to keep the odd buggy tcp that relies on 
-                        *      the fact BSD does this happy. 
-                        *      We don't however need to recalculate the entire 
-                        *      checksum, so someone wanting a small problem to play
-                        *      with might like to implement RFC1141/RFC1624 and speed
-                        *      this up by avoiding a full checksum.
-                        */
-                
-                       th->ack_seq = htonl(sk->acked_seq);
-                       sk->ack_backlog = 0;
-                       sk->bytes_rcv = 0;
-                       th->window = ntohs(tcp_select_window(sk));
-                       tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
-               
-                       /*
-                        *      If the interface is (still) up and running, kick it.
-                        */
-       
-                       if (dev->flags & IFF_UP)
+                       sk->err = icmp_err_convert[code].errno;
+                       if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) 
                         {
-                               /*
-                                *      If the packet is still being sent by the device/protocol
-                                *      below then don't retransmit. This is both needed, and good -
-                                *      especially with connected mode AX.25 where it stops resends
-                                *      occurring of an as yet unsent anyway frame!
-                                *      We still add up the counts as the round trip time wants
-                                *      adjusting.
-                                */
-                               if (sk && !skb_device_locked(skb))
-                               {
-                                       /* Remove it from any existing driver queue first! */
-                                       skb_unlink(skb);
-                                       /* Now queue it */
-                                       ip_statistics.IpOutRequests++;
-                                       dev_queue_xmit(skb, dev, sk->priority);
-                               }
+                               tcp_statistics.TcpAttemptFails++;
+                               tcp_set_state(sk,TCP_CLOSE);
+                               sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
                         }
                 }
-               
-               /*
-                *      Count retransmissions
-                */
-                
-               ct++;
-               sk->prot->retransmits ++;
-               tcp_statistics.TcpRetransSegs++;
-               
-
-               /*
-                *      Only one retransmit requested.
-                */
-       
-               if (!all)
-                       break;
-
-               /*
-                *      This should cut it off before we send too many packets.
-                */
-
-               if (ct >= sk->cong_window)
-                       break;
-               skb = skb->link3;
+               else    /* Only an error on timeout */
+                       sk->err_soft = icmp_err_convert[code].errno;
         }
  }
  
-/*
- *     Reset the retransmission timer
- */
- 
-static void reset_xmit_timer(struct sock *sk, int why, unsigned long when)
-{
-       del_timer(&sk->retransmit_timer);
-       sk->ip_xmit_timeout = why;
-       if((long)when < 0)
-       {
-               when=3;
-               printk("Error: Negative timer in xmit_timer\n");
-       }
-       sk->retransmit_timer.expires=jiffies+when;
-       add_timer(&sk->retransmit_timer);
-}
  
  /*
- *     This is the normal code called for timeouts.  It does the retransmission
- *     and then does backoff.  tcp_do_retransmit is separated out because
- *     tcp_ack needs to send stuff from the retransmit queue without
- *     initiating a backoff.
+ *     Walk down the receive queue counting readable data until we hit the end or we find a gap
+ *     in the received data queue (ie a frame missing that needs sending to us). Not
+ *     sorting using two queues as data arrives makes life so much harder.
   */
  
-
-void tcp_retransmit_time(struct sock *sk, int all)
+static int tcp_readable(struct sock *sk)
  {
-       tcp_do_retransmit(sk, all);
-
-       /*
-        * Increase the timeout each time we retransmit.  Note that
-        * we do not increase the rtt estimate.  rto is initialized
-        * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
-        * that doubling rto each time is the least we can get away with.
-        * In KA9Q, Karn uses this for the first few times, and then
-        * goes to quadratic.  netBSD doubles, but only goes up to *64,
-        * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
-        * defined in the protocol as the maximum possible RTT.  I guess
-        * we'll have to use something other than TCP to talk to the
-        * University of Mars.
-        *
-        * PAWS allows us longer timeouts and large windows, so once
-        * implemented ftp to mars will work nicely. We will have to fix
-        * the 120 second clamps though!
-        */
-
-       sk->retransmits++;
-       sk->prot->retransmits++;
-       sk->backoff++;
-       sk->rto = min(sk->rto << 1, 120*HZ);
-       reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-}
-
+       unsigned long counted;
+       unsigned long amount;
+       struct sk_buff *skb;
+       int sum;
+       unsigned long flags;
  
-/*
- *     A timer event has trigger a tcp retransmit timeout. The
- *     socket xmit queue is ready and set up to send. Because
- *     the ack receive code keeps the queue straight we do
- *     nothing clever here.
- */
+       if(sk && sk->debug)
+               printk("tcp_readable: %p - ",sk);
  
-static void tcp_retransmit(struct sock *sk, int all)
-{
-       if (all) 
+       save_flags(flags);
+       cli();
+       if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
         {
-               tcp_retransmit_time(sk, all);
-               return;
+               restore_flags(flags);
+               if(sk && sk->debug) 
+                       printk("empty\n");
+               return(0);
         }
-
-       sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
-       /* sk->ssthresh in theory can be zero.  I guess that's OK */
-       sk->cong_count = 0;
-
-       sk->cong_window = 1;
-
-       /* Do the actual retransmit. */
-       tcp_retransmit_time(sk, all);
-}
-
-/*
- *     A write timeout has occurred. Process the after effects.
- */
-
-static int tcp_write_timeout(struct sock *sk)
-{
-       /*
-        *      Look for a 'soft' timeout.
+  
+       counted = sk->copied_seq;       /* Where we are at the moment */
+       amount = 0;
+  
+       /* 
+        *      Do until a push or until we are out of data. 
          */
-       if ((sk->state == TCP_ESTABLISHED && sk->retransmits && !(sk->retransmits & 7))
-               || (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR1)) 
-       {
-               /*
-                *      Attempt to recover if arp has changed (unlikely!) or
-                *      a route has shifted (not supported prior to 1.3).
-                */
-               ip_rt_advice(&sk->ip_route_cache, 0);
-       }
-       
-       /*
-        *      Have we tried to SYN too many times (repent repent 8))
-        */
-        
-       if(sk->retransmits > TCP_SYN_RETRIES && sk->state==TCP_SYN_SENT)
-       {
-               if(sk->err_soft)
-                       sk->err=sk->err_soft;
-               else
-                       sk->err=ETIMEDOUT;
-               sk->error_report(sk);
-               del_timer(&sk->retransmit_timer);
-               tcp_statistics.TcpAttemptFails++;       /* Is this right ??? - FIXME - */
-               tcp_set_state(sk,TCP_CLOSE);
-               /* Don't FIN, we got nothing back */
-               release_sock(sk);
-               return 0;
-       }
-       /*
-        *      Has it gone just too far ?
-        */
-       if (sk->retransmits > TCP_RETR2) 
-       {
-               if(sk->err_soft)
-                       sk->err = sk->err_soft;
-               else
-                       sk->err = ETIMEDOUT;
-               sk->error_report(sk);
-               del_timer(&sk->retransmit_timer);
-               /*
-                *      Time wait the socket 
-                */
-               if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 || sk->state == TCP_CLOSING ) 
-               {
-                       tcp_set_state(sk,TCP_TIME_WAIT);
-                       reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
-               }
-               else
-               {
-                       /*
-                        *      Clean up time.
-                        */
-                       tcp_set_state(sk, TCP_CLOSE);
-                       release_sock(sk);
-                       return 0;
-               }
-       }
-       return 1;
-}
-
-/*
- *     The TCP retransmit timer. This lacks a few small details.
- *
- *     1.      An initial rtt timeout on the probe0 should cause what we can
- *             of the first write queue buffer to be split and sent.
- *     2.      On a 'major timeout' as defined by RFC1122 we shouldn't report
- *             ETIMEDOUT if we know an additional 'soft' error caused this.
- *             tcp_err should save a 'soft error' for us.
- */
-
-static void retransmit_timer(unsigned long data)
-{
-       struct sock *sk = (struct sock*)data;
-       int why = sk->ip_xmit_timeout;
-
-       /*
-        *      We are reset. We will send no more retransmits.
-        */
-        
-       if(sk->zapped)
-               return;
-               
-       /* 
-        *      Only process if socket is not in use
-        */
-
-       cli();
-       if (sk->inuse || in_bh) 
-       {
-               /* Try again in 1 second */
-               sk->retransmit_timer.expires = jiffies+HZ;
-               add_timer(&sk->retransmit_timer);
-               sti();
-               return;
-       }
-
-       sk->inuse = 1;
-       sti();
-
-
-       if (sk->ack_backlog && !sk->dead) 
-               sk->data_ready(sk,0);
-
-       /* Now we need to figure out why the socket was on the timer. */
-
-       switch (why) 
-       {
-               /* Window probing */
-               case TIME_PROBE0:
-                       tcp_send_probe0(sk);
-                       tcp_write_timeout(sk);
-                       break;
-               /* Retransmitting */
-               case TIME_WRITE:
-                       /* It could be we got here because we needed to send an ack.
-                        * So we need to check for that.
-                        */
-               {
-                       struct sk_buff *skb;
-                       unsigned long flags;
-
-                       save_flags(flags);
-                       cli();
-                       skb = sk->send_head;
-                       if (!skb) 
-                       {
-                               if (sk->ack_backlog)
-                                       tcp_read_wakeup(sk);
-                               restore_flags(flags);
-                       } 
-                       else 
-                       {
-                               /*
-                                *      Kicked by a delayed ack. Reset timer
-                                *      correctly now
-                                */
-                               if (jiffies < skb->when + sk->rto) 
-                               {
-                                       if (sk->ack_backlog)
-                                               tcp_read_wakeup(sk);
-                                       reset_xmit_timer (sk, TIME_WRITE, skb->when + sk->rto - jiffies);
-                                       restore_flags(flags);
-                                       break;
-                               }
-                               restore_flags(flags);
-                               /*
-                                *      Retransmission
-                                */
-                               sk->retransmits++;
-                               sk->prot->retransmits++;
-                               sk->prot->retransmit (sk, 0);
-                               tcp_write_timeout(sk);
-                       }
-                       break;
-               }
-               /* Sending Keepalives */
-               case TIME_KEEPOPEN:
-                       /* 
-                        * this reset_timer() call is a hack, this is not
-                        * how KEEPOPEN is supposed to work.
-                        */
-                       reset_xmit_timer (sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
-
-                       /* Send something to keep the connection open. */
-                       if (sk->prot->write_wakeup)
-                                 sk->prot->write_wakeup (sk);
-                       sk->retransmits++;
-                       sk->prot->retransmits++;
-                       tcp_write_timeout(sk);
-                       break;
-               default:
-                       printk ("rexmit_timer: timer expired - reason unknown\n");
-                       break;
-       }
-       release_sock(sk);
-}
-
-/*
- * This routine is called by the ICMP module when it gets some
- * sort of error condition.  If err < 0 then the socket should
- * be closed and the error returned to the user.  If err > 0
- * it's just the icmp type << 8 | icmp code.  After adjustment
- * header points to the first 8 bytes of the tcp header.  We need
- * to find the appropriate port.
- */
-
-void tcp_err(int type, int code, unsigned char *header, __u32 daddr,
-       __u32 saddr, struct inet_protocol *protocol)
-{
-       struct tcphdr *th = (struct tcphdr *)header;
-       struct sock *sk;
-       
-       /*
-        *      This one is _WRONG_. FIXME urgently.
-        */
-#ifndef CONFIG_NO_PATH_MTU_DISCOVERY    
-       struct iphdr *iph=(struct iphdr *)(header-sizeof(struct iphdr));
-#endif  
-       th =(struct tcphdr *)header;
-       sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr);
-
-       if (sk == NULL) 
-               return;
-  
-       if (type == ICMP_SOURCE_QUENCH) 
-       {
-               /*
-                * FIXME:
-                * For now we will just trigger a linear backoff.
-                * The slow start code should cause a real backoff here.
-                */
-               if (sk->cong_window > 4)
-                       sk->cong_window--;
-               return;
-       }
-       
-       if (type == ICMP_PARAMETERPROB)
-       {
-               sk->err=EPROTO;
-               sk->error_report(sk);
-       }
-
-#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
-       if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
-       {
-               struct rtable * rt;
-               /*
-                * Ugly trick to pass MTU to protocol layer.
-                * Really we should add argument "info" to error handler.
-                */
-               unsigned short new_mtu = ntohs(iph->id);
-
-               if ((rt = sk->ip_route_cache) != NULL)
-                       if (rt->rt_mtu > new_mtu)
-                               rt->rt_mtu = new_mtu;
-
-               if (sk->mtu > new_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr)
-                       && new_mtu > sizeof(struct iphdr)+sizeof(struct tcphdr))
-                       sk->mtu = new_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
-
-               return;
-       }
-#endif
-
-       /*
-        * If we've already connected we will keep trying
-        * until we time out, or the user gives up.
-        */
-
-       if (code < 13)
-       {       
-               if(icmp_err_convert[code].fatal || sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV)
-               {
-                       sk->err = icmp_err_convert[code].errno;
-                       if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) 
-                       {
-                               tcp_statistics.TcpAttemptFails++;
-                               tcp_set_state(sk,TCP_CLOSE);
-                               sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
-                       }
-               }
-               else    /* Only an error on timeout */
-                       sk->err_soft = icmp_err_convert[code].errno;
-       }
-}
-
-
-/*
- *     Walk down the receive queue counting readable data until we hit the end or we find a gap
- *     in the received data queue (ie a frame missing that needs sending to us). Not
- *     sorting using two queues as data arrives makes life so much harder.
- */
-
-static int tcp_readable(struct sock *sk)
-{
-       unsigned long counted;
-       unsigned long amount;
-       struct sk_buff *skb;
-       int sum;
-       unsigned long flags;
-
-       if(sk && sk->debug)
-               printk("tcp_readable: %p - ",sk);
-
-       save_flags(flags);
-       cli();
-       if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
-       {
-               restore_flags(flags);
-               if(sk && sk->debug) 
-                       printk("empty\n");
-               return(0);
-       }
-  
-       counted = sk->copied_seq;       /* Where we are at the moment */
-       amount = 0;
-  
-       /* 
-        *      Do until a push or until we are out of data. 
-        */
-        
-       do 
+        
+       do 
         {
                 if (before(counted, skb->seq))          /* Found a hole so stops here */
                         break;
@@ -1428,12 +803,6 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
   *     Jorge Cwik <jorge@laser.satlink.net>
   */
   
-unsigned short tcp_check(struct tcphdr *th, int len,
-         unsigned long saddr, unsigned long daddr, unsigned long base)
-{     
-       return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
-}
-
  void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
                 unsigned long daddr, int len, struct sock *sk)
  {
@@ -1443,352 +812,51 @@ void tcp_send_check(struct tcphdr *th, unsigned long saddr,
         return;
  }
  
-/*
- *     This is the main buffer sending routine. We queue the buffer
- *     having checked it is sane seeming.
+
+/* 
+ *     This routine builds a generic TCP header. 
   */
   
-static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
+extern __inline int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
  {
-       int size;
-       struct tcphdr * th = skb->h.th;
  
-       /*
-        *      length of packet (not counting length of pre-tcp headers) 
-        */
-        
-       size = skb->len - ((unsigned char *) th - skb->data);
+       memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
+       th->seq = htonl(sk->write_seq);
+       th->psh =(push == 0) ? 1 : 0;
+       th->doff = sizeof(*th)/4;
+       th->ack = 1;
+       th->fin = 0;
+       sk->ack_backlog = 0;
+       sk->bytes_rcv = 0;
+       sk->ack_timed = 0;
+       th->ack_seq = htonl(sk->acked_seq);
+       sk->window = tcp_select_window(sk);
+       th->window = htons(sk->window);
  
-       /*
-        *      Sanity check it.. 
-        */
-        
-       if (size < sizeof(struct tcphdr) || size > skb->len) 
-       {
-               printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
-                       skb, skb->data, th, skb->len);
-               kfree_skb(skb, FREE_WRITE);
-               return;
-       }
+       return(sizeof(*th));
+}
+
+/*
+ *     This routine copies from a user buffer into a socket,
+ *     and starts the transmit system.
+ */
  
+static int tcp_sendmsg(struct sock *sk, struct msghdr *msg,
+         int len, int nonblock, int flags)
+{
+       int copied = 0;
+       int copy;
+       int tmp;
+       int seglen;
+       int iovct=0;
+       struct sk_buff *skb;
+       struct sk_buff *send_tmp;
+       struct proto *prot;
+       struct device *dev = NULL;
+       unsigned char *from;
+       
         /*
-        *      If we have queued a header size packet.. (these crash a few
-        *      tcp stacks if ack is not set)
-        */
-        
-       if (size == sizeof(struct tcphdr)) 
-       {
-               /* If it's got a syn or fin it's notionally included in the size..*/
-               if(!th->syn && !th->fin) 
-               {
-                       printk("tcp_send_skb: attempt to queue a bogon.\n");
-                       kfree_skb(skb,FREE_WRITE);
-                       return;
-               }
-       }
-
-       /*
-        *      Actual processing.
-        */
-        
-       tcp_statistics.TcpOutSegs++;  
-       skb->seq = ntohl(th->seq);
-       skb->end_seq = skb->seq + size - 4*th->doff;
-       
-       /*
-        *      We must queue if
-        *
-        *      a) The right edge of this frame exceeds the window
-        *      b) We are retransmitting (Nagle's rule)
-        *      c) We have too many packets 'in flight'
-        */
-        
-       if (after(skb->end_seq, sk->window_seq) ||
-           (sk->retransmits && sk->ip_xmit_timeout == TIME_WRITE) ||
-            sk->packets_out >= sk->cong_window) 
-       {
-               /* checksum will be supplied by tcp_write_xmit.  So
-                * we shouldn't need to set it at all.  I'm being paranoid */
-               th->check = 0;
-               if (skb->next != NULL) 
-               {
-                       printk("tcp_send_partial: next != NULL\n");
-                       skb_unlink(skb);
-               }
-               skb_queue_tail(&sk->write_queue, skb);
-               
-               /*
-                *      If we don't fit we have to start the zero window
-                *      probes. This is broken - we really need to do a partial
-                *      send _first_ (This is what causes the Cisco and PC/TCP
-                *      grief).
-                */
-                
-               if (before(sk->window_seq, sk->write_queue.next->end_seq) &&
-                   sk->send_head == NULL && sk->ack_backlog == 0)
-                       reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
-       } 
-       else 
-       {
-               /*
-                *      This is going straight out
-                */
-                
-               th->ack_seq = htonl(sk->acked_seq);
-               th->window = htons(tcp_select_window(sk));
-
-               tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
-
-               sk->sent_seq = sk->write_seq;
-               
-               /*
-                *      This is mad. The tcp retransmit queue is put together
-                *      by the ip layer. This causes half the problems with
-                *      unroutable FIN's and other things.
-                */
-                
-               sk->prot->queue_xmit(sk, skb->dev, skb, 0);
-               
-               
-               sk->ack_backlog = 0;
-               sk->bytes_rcv = 0;
-
-               /*
-                *      Set for next retransmit based on expected ACK time.
-                *      FIXME: We set this every time which means our 
-                *      retransmits are really about a window behind.
-                */
-
-               reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-       }
-}
-
-/*
- *     Locking problems lead us to a messy situation where we can have
- *     multiple partially complete buffers queued up. This is really bad
- *     as we don't want to be sending partial buffers. Fix this with
- *     a semaphore or similar to lock tcp_write per socket.
- *
- *     These routines are pretty self descriptive.
- */
- 
-struct sk_buff * tcp_dequeue_partial(struct sock * sk)
-{
-       struct sk_buff * skb;
-       unsigned long flags;
-
-       save_flags(flags);
-       cli();
-       skb = sk->partial;
-       if (skb) {
-               sk->partial = NULL;
-               del_timer(&sk->partial_timer);
-       }
-       restore_flags(flags);
-       return skb;
-}
-
-/*
- *     Empty the partial queue
- */
- 
-static void tcp_send_partial(struct sock *sk)
-{
-       struct sk_buff *skb;
-
-       if (sk == NULL)
-               return;
-       while ((skb = tcp_dequeue_partial(sk)) != NULL)
-               tcp_send_skb(sk, skb);
-}
-
-/*
- *     Queue a partial frame
- */
- 
-void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
-{
-       struct sk_buff * tmp;
-       unsigned long flags;
-
-       save_flags(flags);
-       cli();
-       tmp = sk->partial;
-       if (tmp)
-               del_timer(&sk->partial_timer);
-       sk->partial = skb;
-       init_timer(&sk->partial_timer);
-       /*
-        *      Wait up to 1 second for the buffer to fill.
-        */
-       sk->partial_timer.expires = jiffies+HZ;
-       sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
-       sk->partial_timer.data = (unsigned long) sk;
-       add_timer(&sk->partial_timer);
-       restore_flags(flags);
-       if (tmp)
-               tcp_send_skb(sk, tmp);
-}
-
-
-
-/*
- *     This routine sends an ack and also updates the window. 
- */
- 
-static void tcp_send_ack(u32 sequence, u32 ack,
-            struct sock *sk,
-            struct tcphdr *th, unsigned long daddr)
-{
-       struct sk_buff *buff;
-       struct tcphdr *t1;
-       struct device *dev = NULL;
-       int tmp;
-
-       if(sk->zapped)
-               return;         /* We have been reset, we may not send again */
-               
-       /*
-        * We need to grab some memory, and put together an ack,
-        * and then put it into the queue to be sent.
-        */
-
-       buff = sock_wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
-       if (buff == NULL) 
-       {
-               /* 
-                *      Force it to send an ack. We don't have to do this
-                *      (ACK is unreliable) but it's much better use of 
-                *      bandwidth on slow links to send a spare ack than
-                *      resend packets. 
-                */
-                
-               sk->ack_backlog++;
-               if (sk->ip_xmit_timeout != TIME_WRITE && tcp_connected(sk->state)) 
-               {
-                       reset_xmit_timer(sk, TIME_WRITE, HZ);
-               }
-               return;
-       }
-
-       /*
-        *      Assemble a suitable TCP frame
-        */
-        
-       buff->sk = sk;
-       buff->localroute = sk->localroute;
-
-       /* 
-        *      Put in the IP header and routing stuff. 
-        */
-        
-       tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
-                               IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
-       if (tmp < 0) 
-       {
-               buff->free = 1;
-               sock_wfree(sk, buff);
-               return;
-       }
-       t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
-
-       memcpy(t1, th, sizeof(*t1));
-
-       /*
-        *      Swap the send and the receive. 
-        */
-        
-       t1->dest = th->source;
-       t1->source = th->dest;
-       t1->seq = ntohl(sequence);
-       t1->ack = 1;
-       sk->window = tcp_select_window(sk);
-       t1->window = ntohs(sk->window);
-       t1->res1 = 0;
-       t1->res2 = 0;
-       t1->rst = 0;
-       t1->urg = 0;
-       t1->syn = 0;
-       t1->psh = 0;
-       t1->fin = 0;
-       
-       /*
-        *      If we have nothing queued for transmit and the transmit timer
-        *      is on we are just doing an ACK timeout and need to switch
-        *      to a keepalive.
-        */
-        
-       if (ack == sk->acked_seq) {               
-               sk->ack_backlog = 0;
-               sk->bytes_rcv = 0;
-               sk->ack_timed = 0;
-
-               if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
-                   && sk->ip_xmit_timeout == TIME_WRITE)       
-                 if(sk->keepopen) 
-                   reset_xmit_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
-                 else 
-                   delete_timer(sk);                           
-       }
-
-       /*
-        *      Fill in the packet and send it
-        */
-        
-       t1->ack_seq = htonl(ack);
-       t1->doff = sizeof(*t1)/4;
-       tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
-       if (sk->debug)
-                printk("\rtcp_ack: seq %x ack %x\n", sequence, ack);
-       tcp_statistics.TcpOutSegs++;
-       sk->prot->queue_xmit(sk, dev, buff, 1);
-}
-
-
-/* 
- *     This routine builds a generic TCP header. 
- */
- 
-extern __inline int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
-{
-
-       memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
-       th->seq = htonl(sk->write_seq);
-       th->psh =(push == 0) ? 1 : 0;
-       th->doff = sizeof(*th)/4;
-       th->ack = 1;
-       th->fin = 0;
-       sk->ack_backlog = 0;
-       sk->bytes_rcv = 0;
-       sk->ack_timed = 0;
-       th->ack_seq = htonl(sk->acked_seq);
-       sk->window = tcp_select_window(sk);
-       th->window = htons(sk->window);
-
-       return(sizeof(*th));
-}
-
-/*
- *     This routine copies from a user buffer into a socket,
- *     and starts the transmit system.
- */
-
-static int tcp_sendmsg(struct sock *sk, struct msghdr *msg,
-         int len, int nonblock, int flags)
-{
-       int copied = 0;
-       int copy;
-       int tmp;
-       int seglen;
-       int iovct=0;
-       struct sk_buff *skb;
-       struct sk_buff *send_tmp;
-       struct proto *prot;
-       struct device *dev = NULL;
-       unsigned char *from;
-       
-       /*
-        *      Do sanity checking for sendmsg/sendto/send
+        *      Do sanity checking for sendmsg/sendto/send
          */
          
         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
@@ -2027,18 +1095,10 @@ static int tcp_sendmsg(struct sock *sk, struct msghdr *msg,
                                         return(-EAGAIN);
                                 }
  
-                               /*
-                                *      FIXME: here is another race condition. 
-                                */
-
-                               tmp = sk->wmem_alloc;
                                 release_sock(sk);
                                 cli();
-                               /*
-                                *      Again we will try to avoid it. 
-                                */
-                               if (tmp <= sk->wmem_alloc &&
-                                         (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
+                               if (sk->wmem_alloc*2 > sk->sndbuf &&
+                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
                                         && sk->err == 0) 
                                 {
                                         sk->socket->flags &= ~SO_NOSPACE;
@@ -2142,7 +1202,7 @@ static int tcp_sendmsg(struct sock *sk, struct msghdr *msg,
   *      This is called for delayed acks also.
   */
   
-static void tcp_read_wakeup(struct sock *sk)
+void tcp_read_wakeup(struct sock *sk)
  {
         int tmp;
         struct device *dev = NULL;
@@ -2174,7 +1234,7 @@ static void tcp_read_wakeup(struct sock *sk)
         if (buff == NULL) 
         {
                 /* Try again real soon. */
-               reset_xmit_timer(sk, TIME_WRITE, HZ);
+               tcp_reset_xmit_timer(sk, TIME_WRITE, HZ);
                 return;
         }
  
@@ -2301,7 +1361,7 @@ static void cleanup_rbuf(struct sock *sk)
                 int was_active = del_timer(&sk->retransmit_timer);
                 if (!was_active || jiffies+TCP_ACK_TIME < sk->timer.expires) 
                 {
-                       reset_xmit_timer(sk, TIME_WRITE, TCP_ACK_TIME);
+                       tcp_reset_xmit_timer(sk, TIME_WRITE, TCP_ACK_TIME);
                 } 
                 else
                         add_timer(&sk->retransmit_timer);
@@ -2666,113 +1726,12 @@ static int tcp_close_state(struct sock *sk, int dead)
                 if(timer_active)
                         add_timer(&sk->timer);
                 else
-                       reset_msl_timer(sk, TIME_CLOSE, TCP_FIN_TIMEOUT);
+                       tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_FIN_TIMEOUT);
         }
         
         return send_fin;
  }
  
-/*
- *     Send a fin.
- */
-
-static void tcp_send_fin(struct sock *sk)
-{
-       struct proto *prot =(struct proto *)sk->prot;
-       struct tcphdr *th =(struct tcphdr *)&sk->dummy_th;
-       struct tcphdr *t1;
-       struct sk_buff *buff;
-       struct device *dev=NULL;
-       int tmp;
-               
-       release_sock(sk); /* in case the malloc sleeps. */
-       
-       buff = sock_wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
-       sk->inuse = 1;
-
-       if (buff == NULL)
-       {
-               /* This is a disaster if it occurs */
-               printk("tcp_send_fin: Impossible malloc failure");
-               return;
-       }
-
-       /*
-        *      Administrivia
-        */
-        
-       buff->sk = sk;
-       buff->localroute = sk->localroute;
-
-       /*
-        *      Put in the IP header and routing stuff. 
-        */
-
-       tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
-                          IPPROTO_TCP, sk->opt,
-                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
-       if (tmp < 0) 
-       {
-               int t;
-               /*
-                *      Finish anyway, treat this as a send that got lost. 
-                *      (Not good).
-                */
-                
-               buff->free = 1;
-               sock_wfree(sk,buff);
-               sk->write_seq++;
-               t=del_timer(&sk->timer);
-               if(t)
-                       add_timer(&sk->timer);
-               else
-                       reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
-               return;
-       }
-       
-       /*
-        *      We ought to check if the end of the queue is a buffer and
-        *      if so simply add the fin to that buffer, not send it ahead.
-        */
-
-       t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
-       buff->dev = dev;
-       memcpy(t1, th, sizeof(*t1));
-       buff->seq = sk->write_seq;
-       sk->write_seq++;
-       buff->end_seq = sk->write_seq;
-       t1->seq = htonl(buff->seq);
-       t1->ack = 1;
-       t1->ack_seq = htonl(sk->acked_seq);
-       t1->window = htons(sk->window=tcp_select_window(sk));
-       t1->fin = 1;
-       t1->rst = 0;
-       t1->doff = sizeof(*t1)/4;
-       tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
-
-       /*
-        * If there is data in the write queue, the fin must be appended to
-        * the write queue.
-        */
-       
-       if (skb_peek(&sk->write_queue) != NULL) 
-       {
-               buff->free = 0;
-               if (buff->next != NULL) 
-               {
-                       printk("tcp_send_fin: next != NULL\n");
-                       skb_unlink(buff);
-               }
-               skb_queue_tail(&sk->write_queue, buff);
-       } 
-       else 
-       {
-               sk->sent_seq = sk->write_seq;
-               sk->prot->queue_xmit(sk, dev, buff, 0);
-               reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-       }
-}
-
  /*
   *     Shutdown the sending side of a connection. Much like close except
   *     that we don't receive shut down or set sk->dead=1.
@@ -2829,374 +1788,249 @@ void tcp_shutdown(struct sock *sk, int how)
         release_sock(sk);
  }
  
-/*
- *     This routine will send an RST to the other tcp. 
- */
- 
-static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
-         struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
+static void tcp_close(struct sock *sk, int timeout)
  {
-       struct sk_buff *buff;
-       struct tcphdr *t1;
-       int tmp;
-       struct device *ndev=NULL;
-
         /*
-        *      Cannot reset a reset (Think about it).
-        */
-        
-       if(th->rst)
-               return;
-  
-       /*
-        * We need to grab some memory, and put together an RST,
+        * We need to grab some memory, and put together a FIN, 
          * and then put it into the queue to be sent.
          */
-
-       buff = sock_wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
-       if (buff == NULL) 
-               return;
-
-       buff->sk = NULL;
-       buff->dev = dev;
-       buff->localroute = 0;
-
-       /*
-        *      Put in the IP header and routing stuff. 
-        */
-
-       tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
-                          sizeof(struct tcphdr),tos,ttl,NULL);
-       if (tmp < 0) 
+       
+       sk->inuse = 1;
+       
+       tcp_cache_zap();
+       if(sk->state == TCP_LISTEN)
         {
-               buff->free = 1;
-               sock_wfree(NULL, buff);
+               /* Special case */
+               tcp_set_state(sk, TCP_CLOSE);
+               tcp_close_pending(sk);
+               release_sock(sk);
                 return;
         }
+       
+       sk->keepopen = 1;
+       sk->shutdown = SHUTDOWN_MASK;
  
-       t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
-       memcpy(t1, th, sizeof(*t1));
+       if (!sk->dead) 
+               sk->state_change(sk);
+
+       if (timeout == 0) 
+       {
+               struct sk_buff *skb;
+               
+               /*
+                *  We need to flush the recv. buffs.  We do this only on the
+                *  descriptor close, not protocol-sourced closes, because the
+                *  reader process may not have drained the data yet!
+                */
+                
+               while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
+                       kfree_skb(skb, FREE_READ);
+               /*
+                *      Get rid off any half-completed packets. 
+                */
+
+               if (sk->partial) 
+                       tcp_send_partial(sk);
+       }
  
+               
         /*
-        *      Swap the send and the receive. 
+        *      Timeout is not the same thing - however the code likes
+        *      to send both the same way (sigh).
          */
-
-       t1->dest = th->source;
-       t1->source = th->dest;
-       t1->rst = 1;  
-       t1->window = 0;
-  
-       if(th->ack)
+        
+       if(timeout)
         {
-               t1->ack = 0;
-               t1->seq = th->ack_seq;
-               t1->ack_seq = 0;
+               tcp_set_state(sk, TCP_CLOSE);   /* Dead */
         }
         else
         {
-               t1->ack = 1;
-               if(!th->syn)
-                       t1->ack_seq = th->seq;
-               else
-                       t1->ack_seq = htonl(ntohl(th->seq)+1);
-               t1->seq = 0;
+               if(tcp_close_state(sk,1)==1)
+               {
+                       tcp_send_fin(sk);
+               }
         }
-
-       t1->syn = 0;
-       t1->urg = 0;
-       t1->fin = 0;
-       t1->psh = 0;
-       t1->doff = sizeof(*t1)/4;
-       tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
-       prot->queue_xmit(NULL, ndev, buff, 1);
-       tcp_statistics.TcpOutSegs++;
+       release_sock(sk);
  }
  
  
  /*
- *     Look for tcp options. Parses everything but only knows about MSS.
- *     This routine is always called with the packet containing the SYN.
- *     However it may also be called with the ack to the SYN.  So you
- *     can't assume this is always the SYN.  It's always called after
- *     we have set up sk->mtu to our own MTU.
- *
- *     We need at minimum to add PAWS support here. Possibly large windows
- *     as Linux gets deployed on 100Mb/sec networks.
+ *     This will accept the next outstanding connection. 
   */
   
-static void tcp_options(struct sock *sk, struct tcphdr *th)
+static struct sock *tcp_accept(struct sock *sk, int flags)
  {
-       unsigned char *ptr;
-       int length=(th->doff*4)-sizeof(struct tcphdr);
-       int mss_seen = 0;
-    
-       ptr = (unsigned char *)(th + 1);
+       struct sock *newsk;
+       struct sk_buff *skb;
    
-       while(length>0)
+  /*
+   * We need to make sure that this socket is listening,
+   * and that it has something pending.
+   */
+
+       if (sk->state != TCP_LISTEN) 
         {
-               int opcode=*ptr++;
-               int opsize=*ptr++;
-               switch(opcode)
-               {
-                       case TCPOPT_EOL:
-                               return;
-                       case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
-                               length--;
-                               ptr--;          /* the opsize=*ptr++ above was a mistake */
-                               continue;
-                       
-                       default:
-                               if(opsize<=2)   /* Avoid silly options looping forever */
-                                       return;
-                               switch(opcode)
-                               {
-                                       case TCPOPT_MSS:
-                                               if(opsize==4 && th->syn)
-                                               {
-                                                       sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
-                                                       mss_seen = 1;
-                                               }
-                                               break;
-                                               /* Add other options here as people feel the urge to implement stuff like large windows */
-                               }
-                               ptr+=opsize-2;
-                               length-=opsize;
-               }
+               sk->err = EINVAL;
+               return(NULL); 
         }
-       if (th->syn) 
+
+       /* Avoid the race. */
+       cli();
+       sk->inuse = 1;
+
+       while((skb = tcp_dequeue_established(sk)) == NULL) 
         {
-               if (! mss_seen)
-                     sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
-       }
-#ifdef CONFIG_INET_PCTCP
-       sk->mss = min(sk->max_window >> 1, sk->mtu);
-#else    
-       sk->mss = min(sk->max_window, sk->mtu);
-       sk->max_unacked = 2 * sk->mss;
-#endif  
-}
+               if (flags & O_NONBLOCK) 
+               {
+                       sti();
+                       release_sock(sk);
+                       sk->err = EAGAIN;
+                       return(NULL);
+               }
  
-static inline unsigned long default_mask(unsigned long dst)
-{
-       dst = ntohl(dst);
-       if (IN_CLASSA(dst))
-               return htonl(IN_CLASSA_NET);
-       if (IN_CLASSB(dst))
-               return htonl(IN_CLASSB_NET);
-       return htonl(IN_CLASSC_NET);
-}
+               release_sock(sk);
+               interruptible_sleep_on(sk->sleep);
+               if (current->signal & ~current->blocked) 
+               {
+                       sti();
+                       sk->err = ERESTARTSYS;
+                       return(NULL);
+               }
+               sk->inuse = 1;
+       }
+       sti();
  
-/*
- *     Default sequence number picking algorithm.
- *     As close as possible to RFC 793, which
- *     suggests using a 250kHz clock.
- *     Further reading shows this assumes 2MB/s networks.
- *     For 10MB/s ethernet, a 1MHz clock is appropriate.
- *     That's funny, Linux has one built in!  Use it!
- */
+       /*
+        *      Now all we need to do is return skb->sk. 
+        */
  
-extern inline u32 tcp_init_seq(void)
-{
-       struct timeval tv;
-       do_gettimeofday(&tv);
-       return tv.tv_usec+tv.tv_sec*1000000;
+       newsk = skb->sk;
+
+       kfree_skb(skb, FREE_READ);
+       sk->ack_backlog--;
+       release_sock(sk);
+       return(newsk);
  }
  
  /*
- *     This routine handles a connection request.
- *     It should make sure we haven't already responded.
- *     Because of the way BSD works, we have to send a syn/ack now.
- *     This also means it will be harder to close a socket which is
- *     listening.
+ *     This will initiate an outgoing connection. 
   */
   
-static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
-                unsigned long daddr, unsigned long saddr,
-                struct options *opt, struct device *dev, u32 seq)
+static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
  {
         struct sk_buff *buff;
-       struct tcphdr *t1;
+       struct device *dev=NULL;
         unsigned char *ptr;
-       struct sock *newsk;
-       struct tcphdr *th;
-       struct device *ndev=NULL;
         int tmp;
+       int atype;
+       struct tcphdr *t1;
         struct rtable *rt;
-  
-       th = skb->h.th;
  
-       /* If the socket is dead, don't accept the connection. */
-       if (!sk->dead) 
-       {
-               sk->data_ready(sk,0);
-       }
-       else 
-       {
-               if(sk->debug)
-                       printk("Reset on %p: Connect on dead socket.\n",sk);
-               tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
-               tcp_statistics.TcpAttemptFails++;
-               kfree_skb(skb, FREE_READ);
-               return;
-       }
+       if (sk->state != TCP_CLOSE) 
+               return(-EISCONN);
  
         /*
-        * Make sure we can accept more.  This will prevent a
-        * flurry of syns from eating up all our memory.
+        *      Don't allow a double connect.
          */
+               
+       if(sk->daddr)
+               return -EINVAL;
+       
+       if (addr_len < 8) 
+               return(-EINVAL);
  
-       if (sk->ack_backlog >= sk->max_ack_backlog) 
-       {
-               tcp_statistics.TcpAttemptFails++;
-               kfree_skb(skb, FREE_READ);
-               return;
-       }
+       if (usin->sin_family && usin->sin_family != AF_INET) 
+               return(-EAFNOSUPPORT);
  
+       /*
+        *      connect() to INADDR_ANY means loopback (BSD'ism).
+        */
+       
+       if(usin->sin_addr.s_addr==INADDR_ANY)
+               usin->sin_addr.s_addr=ip_my_addr();
+                 
         /*
-        * We need to build a new sock struct.
-        * It is sort of bad to have a socket without an inode attached
-        * to it, but the wake_up's will just wake up the listening socket,
-        * and if the listening socket is destroyed before this is taken
-        * off of the queue, this will take care of it.
+        *      Don't want a TCP connection going to a broadcast address 
          */
  
-       newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
-       if (newsk == NULL) 
-       {
-               /* just ignore the syn.  It will get retransmitted. */
-               tcp_statistics.TcpAttemptFails++;
-               kfree_skb(skb, FREE_READ);
-               return;
-       }
+       if ((atype=ip_chk_addr(usin->sin_addr.s_addr)) == IS_BROADCAST || atype==IS_MULTICAST) 
+               return -ENETUNREACH;
+  
+       sk->inuse = 1;
+       sk->daddr = usin->sin_addr.s_addr;
+       sk->write_seq = tcp_init_seq();
+       sk->window_seq = sk->write_seq;
+       sk->rcv_ack_seq = sk->write_seq -1;
+       sk->err = 0;
+       sk->dummy_th.dest = usin->sin_port;
+       release_sock(sk);
  
-       memcpy(newsk, sk, sizeof(*newsk));
-       newsk->opt = NULL;
-       newsk->ip_route_cache  = NULL;
-       if (opt && opt->optlen) {
-         sk->opt = (struct options*)kmalloc(sizeof(struct options)+opt->optlen, GFP_ATOMIC);
-         if (!sk->opt) {
-               kfree_s(newsk, sizeof(struct sock));
-               tcp_statistics.TcpAttemptFails++;
-               kfree_skb(skb, FREE_READ);
-               return;
-         }
-         if (ip_options_echo(sk->opt, opt, daddr, saddr, skb)) {
-               kfree_s(sk->opt, sizeof(struct options)+opt->optlen);
-               kfree_s(newsk, sizeof(struct sock));
-               tcp_statistics.TcpAttemptFails++;
-               kfree_skb(skb, FREE_READ);
-               return;
-         }
+       buff = sock_wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
+       if (buff == NULL) 
+       {
+               return(-ENOMEM);
         }
-       skb_queue_head_init(&newsk->write_queue);
-       skb_queue_head_init(&newsk->receive_queue);
-       newsk->send_head = NULL;
-       newsk->send_tail = NULL;
-       skb_queue_head_init(&newsk->back_log);
-       newsk->rtt = 0;         /*TCP_CONNECT_TIME<<3*/
-       newsk->rto = TCP_TIMEOUT_INIT;
-       newsk->mdev = 0;
-       newsk->max_window = 0;
-       newsk->cong_window = 1;
-       newsk->cong_count = 0;
-       newsk->ssthresh = 0;
-       newsk->backoff = 0;
-       newsk->blog = 0;
-       newsk->intr = 0;
-       newsk->proc = 0;
-       newsk->done = 0;
-       newsk->partial = NULL;
-       newsk->pair = NULL;
-       newsk->wmem_alloc = 0;
-       newsk->rmem_alloc = 0;
-       newsk->localroute = sk->localroute;
-
-       newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
-
-       newsk->err = 0;
-       newsk->shutdown = 0;
-       newsk->ack_backlog = 0;
-       newsk->acked_seq = skb->seq+1;
-       newsk->lastwin_seq = skb->seq+1;
-       newsk->delay_acks = 1;
-       newsk->copied_seq = skb->seq+1;
-       newsk->fin_seq = skb->seq;
-       newsk->state = TCP_SYN_RECV;
-       newsk->timeout = 0;
-       newsk->ip_xmit_timeout = 0;
-       newsk->write_seq = seq; 
-       newsk->window_seq = newsk->write_seq;
-       newsk->rcv_ack_seq = newsk->write_seq;
-       newsk->urg_data = 0;
-       newsk->retransmits = 0;
-       newsk->linger=0;
-       newsk->destroy = 0;
-       init_timer(&newsk->timer);
-       newsk->timer.data = (unsigned long)newsk;
-       newsk->timer.function = &net_timer;
-       init_timer(&newsk->retransmit_timer);
-       newsk->retransmit_timer.data = (unsigned long)newsk;
-       newsk->retransmit_timer.function=&retransmit_timer;
-       newsk->dummy_th.source = skb->h.th->dest;
-       newsk->dummy_th.dest = skb->h.th->source;
+       sk->inuse = 1;
+       buff->sk = sk;
+       buff->free = 0;
+       buff->localroute = sk->localroute;
         
-       /*
-        *      Swap these two, they are from our point of view. 
-        */
-        
-       newsk->daddr = saddr;
-       newsk->saddr = daddr;
-       newsk->rcv_saddr = daddr;
-
-       put_sock(newsk->num,newsk);
-       newsk->dummy_th.res1 = 0;
-       newsk->dummy_th.doff = 6;
-       newsk->dummy_th.fin = 0;
-       newsk->dummy_th.syn = 0;
-       newsk->dummy_th.rst = 0;        
-       newsk->dummy_th.psh = 0;
-       newsk->dummy_th.ack = 0;
-       newsk->dummy_th.urg = 0;
-       newsk->dummy_th.res2 = 0;
-       newsk->acked_seq = skb->seq + 1;
-       newsk->copied_seq = skb->seq + 1;
-       newsk->socket = NULL;
-
-       /*
-        *      Grab the ttl and tos values and use them 
-        */
-
-       newsk->ip_ttl=sk->ip_ttl;
-       newsk->ip_tos=skb->ip_hdr->tos;
  
         /*
-        *      Use 512 or whatever user asked for 
+        *      Put in the IP header and routing stuff.
          */
+        
+       tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
+               IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
+       if (tmp < 0) 
+       {
+               sock_wfree(sk, buff);
+               release_sock(sk);
+               return(-ENETUNREACH);
+       }
+       if ((rt = sk->ip_route_cache) != NULL && !sk->saddr)
+               sk->saddr = rt->rt_src;
+       sk->rcv_saddr = sk->saddr;
  
-       /*
-        *      Note use of sk->user_mss, since user has no direct access to newsk 
-        */
+       t1 = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr));
  
-       rt = ip_rt_route(newsk->opt && newsk->opt->srr ? newsk->opt->faddr : saddr, 0);
-       newsk->ip_route_cache = rt;
+       memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
+       buff->seq = sk->write_seq++;
+       t1->seq = htonl(buff->seq);
+       sk->sent_seq = sk->write_seq;
+       buff->end_seq = sk->write_seq;
+       t1->ack = 0;
+       t1->window = 2;
+       t1->res1=0;
+       t1->res2=0;
+       t1->rst = 0;
+       t1->urg = 0;
+       t1->psh = 0;
+       t1->syn = 1;
+       t1->urg_ptr = 0;
+       t1->doff = 6;
+       /* use 512 or whatever user asked for */
         
         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
-               newsk->window_clamp = rt->rt_window;
+               sk->window_clamp=rt->rt_window;
         else
-               newsk->window_clamp = 0;
-               
+               sk->window_clamp=0;
+
         if (sk->user_mss)
-               newsk->mtu = sk->user_mss;
+               sk->mtu = sk->user_mss;
         else if (rt)
-               newsk->mtu = rt->rt_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
+               sk->mtu = rt->rt_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
         else 
-               newsk->mtu = 576 - sizeof(struct iphdr) - sizeof(struct tcphdr);
+               sk->mtu = 576 - sizeof(struct iphdr) - sizeof(struct tcphdr);
  
         /*
-        *      But not bigger than device MTU 
+        *      but not bigger than device MTU 
          */
  
-       newsk->mtu = min(newsk->mtu, dev->mtu - sizeof(struct iphdr) - sizeof(struct tcphdr));
+       if(sk->mtu <32)
+               sk->mtu = 32;   /* Sanity limit */
+               
+       sk->mtu = min(sk->mtu, dev->mtu - sizeof(struct iphdr) - sizeof(struct tcphdr));
  
  #ifdef CONFIG_SKIP
         
@@ -3212,2328 +2046,42 @@ static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
         if(skip_pick_mtu!=NULL)         /* If SKIP is loaded.. */
                 sk->mtu=skip_pick_mtu(sk->mtu,dev);
  #endif
+       
         /*
-        *      This will min with what arrived in the packet 
+        *      Put in the TCP options to say MTU. 
          */
  
-       tcp_options(newsk,skb->h.th);
-       
-       tcp_cache_zap();
-
-       buff = sock_wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
-       if (buff == NULL) 
-       {
-               sk->err = ENOMEM;
-               newsk->dead = 1;
-               newsk->state = TCP_CLOSE;
-               /* And this will destroy it */
-               release_sock(newsk);
-               kfree_skb(skb, FREE_READ);
-               tcp_statistics.TcpAttemptFails++;
-               return;
-       }
-  
-       buff->sk = newsk;
-       buff->localroute = newsk->localroute;
-
-       /*
-        *      Put in the IP header and routing stuff. 
-        */
-
-       tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
-                              IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl,&newsk->ip_route_cache);
-
-       /*
-        *      Something went wrong. 
-        */
-
-       if (tmp < 0) 
-       {
-               sk->err = tmp;
-               buff->free = 1;
-               kfree_skb(buff,FREE_WRITE);
-               newsk->dead = 1;
-               newsk->state = TCP_CLOSE;
-               release_sock(newsk);
-               skb->sk = sk;
-               kfree_skb(skb, FREE_READ);
-               tcp_statistics.TcpAttemptFails++;
-               return;
-       }
-
-       t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
-  
-       memcpy(t1, skb->h.th, sizeof(*t1));
-       buff->seq = newsk->write_seq++;
-       buff->end_seq = newsk->write_seq;
-       /*
-        *      Swap the send and the receive. 
-        */
-       t1->dest = skb->h.th->source;
-       t1->source = newsk->dummy_th.source;
-       t1->seq = ntohl(buff->seq);
-       t1->ack = 1;
-       newsk->sent_seq = newsk->write_seq;
-       t1->window = ntohs(tcp_select_window(newsk));
-       t1->res1 = 0;
-       t1->res2 = 0;
-       t1->rst = 0;
-       t1->urg = 0;
-       t1->psh = 0;
-       t1->syn = 1;
-       t1->ack_seq = htonl(newsk->acked_seq);
-       t1->doff = sizeof(*t1)/4+1;
-       ptr = skb_put(buff,4);
-       ptr[0] = 2;
-       ptr[1] = 4;
-       ptr[2] = ((newsk->mtu) >> 8) & 0xff;
-       ptr[3] =(newsk->mtu) & 0xff;
-
-       tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
-       newsk->prot->queue_xmit(newsk, ndev, buff, 0);
-       reset_xmit_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT);
-       skb->sk = newsk;
-
-       /*
-        *      Charge the sock_buff to newsk. 
-        */
-        
-       sk->rmem_alloc -= skb->truesize;
-       newsk->rmem_alloc += skb->truesize;
-       
-       skb_queue_tail(&sk->receive_queue,skb);
-       sk->ack_backlog++;
-       release_sock(newsk);
-       tcp_statistics.TcpOutSegs++;
-}
-
-
-static void tcp_close(struct sock *sk, int timeout)
-{
-       /*
-        * We need to grab some memory, and put together a FIN, 
-        * and then put it into the queue to be sent.
-        */
-       
-       sk->inuse = 1;
-       
-       if(th_cache_sk==sk)
-               tcp_cache_zap();
-       if(sk->state == TCP_LISTEN)
-       {
-               /* Special case */
-               tcp_set_state(sk, TCP_CLOSE);
-               tcp_close_pending(sk);
-               release_sock(sk);
-               return;
-       }
-       
-       sk->keepopen = 1;
-       sk->shutdown = SHUTDOWN_MASK;
-
-       if (!sk->dead) 
-               sk->state_change(sk);
-
-       if (timeout == 0) 
-       {
-               struct sk_buff *skb;
-               
-               /*
-                *  We need to flush the recv. buffs.  We do this only on the
-                *  descriptor close, not protocol-sourced closes, because the
-                *  reader process may not have drained the data yet!
-                */
-                
-               while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
-                       kfree_skb(skb, FREE_READ);
-               /*
-                *      Get rid off any half-completed packets. 
-                */
-
-               if (sk->partial) 
-                       tcp_send_partial(sk);
-       }
-
-               
-       /*
-        *      Timeout is not the same thing - however the code likes
-        *      to send both the same way (sigh).
-        */
-        
-       if(timeout)
-       {
-               tcp_set_state(sk, TCP_CLOSE);   /* Dead */
-       }
-       else
-       {
-               if(tcp_close_state(sk,1)==1)
-               {
-                       tcp_send_fin(sk);
-               }
-       }
-       release_sock(sk);
-}
-
-
-/*
- *     This routine takes stuff off of the write queue,
- *     and puts it in the xmit queue. This happens as incoming acks
- *     open up the remote window for us.
- */
- 
-static void tcp_write_xmit(struct sock *sk)
-{
-       struct sk_buff *skb;
-
-       /*
-        *      The bytes will have to remain here. In time closedown will
-        *      empty the write queue and all will be happy 
-        */
-
-       if(sk->zapped)
-               return;
-
-       /*
-        *      Anything on the transmit queue that fits the window can
-        *      be added providing we are not
-        *
-        *      a) retransmitting (Nagle's rule)
-        *      b) exceeding our congestion window.
-        */
-        
-       while((skb = skb_peek(&sk->write_queue)) != NULL &&
-               before(skb->end_seq, sk->window_seq + 1) &&
-               (sk->retransmits == 0 ||
-                sk->ip_xmit_timeout != TIME_WRITE ||
-                before(skb->end_seq, sk->rcv_ack_seq + 1))
-               && sk->packets_out < sk->cong_window) 
-       {
-               IS_SKB(skb);
-               skb_unlink(skb);
-               
-               /*
-                *      See if we really need to send the packet. 
-                */
-                
-               if (before(skb->end_seq, sk->rcv_ack_seq +1)) 
-               {
-                       /*
-                        *      This is acked data. We can discard it. This 
-                        *      cannot currently occur.
-                        */
-                        
-                       sk->retransmits = 0;
-                       kfree_skb(skb, FREE_WRITE);
-                       if (!sk->dead) 
-                               sk->write_space(sk);
-               } 
-               else
-               {
-                       struct tcphdr *th;
-                       struct iphdr *iph;
-                       int size;
-/*
- * put in the ack seq and window at this point rather than earlier,
- * in order to keep them monotonic.  We really want to avoid taking
- * back window allocations.  That's legal, but RFC1122 says it's frowned on.
- * Ack and window will in general have changed since this packet was put
- * on the write queue.
- */
-                       iph = skb->ip_hdr;
-                       th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
-                       size = skb->len - (((unsigned char *) th) - skb->data);
-#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
-                       if (size > sk->mtu - sizeof(struct iphdr))
-                       {
-                               iph->frag_off &= ~htons(IP_DF);
-                               ip_send_check(iph);
-                       }
-#endif
-                       
-                       th->ack_seq = htonl(sk->acked_seq);
-                       th->window = htons(tcp_select_window(sk));
-
-                       tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
-
-                       sk->sent_seq = skb->end_seq;
-                       
-                       /*
-                        *      IP manages our queue for some crazy reason
-                        */
-                        
-                       sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
-                       
-                       
-                       sk->ack_backlog = 0;
-                       sk->bytes_rcv = 0;
-
-                       /*
-                        *      Again we slide the timer wrongly
-                        */
-                        
-                       reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-               }
-       }
-}
-
-
-/*
- *     This routine deals with incoming acks, but not outgoing ones.
- */
-
-extern __inline__ int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
-{
-       u32 ack;
-       int flag = 0;
-
-       /* 
-        * 1 - there was data in packet as well as ack or new data is sent or 
-        *     in shutdown state
-        * 2 - data from retransmit queue was acked and removed
-        * 4 - window shrunk or data from retransmit queue was acked and removed
-        */
-
-       if(sk->zapped)
-               return(1);      /* Dead, cant ack any more so why bother */
-
-       /*
-        *      Have we discovered a larger window
-        */
-        
-       ack = ntohl(th->ack_seq);
-
-       if (ntohs(th->window) > sk->max_window) 
-       {
-               sk->max_window = ntohs(th->window);
-#ifdef CONFIG_INET_PCTCP
-               /* Hack because we don't send partial packets to non SWS
-                  handling hosts */
-               sk->mss = min(sk->max_window>>1, sk->mtu);
-#else
-               sk->mss = min(sk->max_window, sk->mtu);
-#endif 
-       }
-
-       /*
-        *      We have dropped back to keepalive timeouts. Thus we have
-        *      no retransmits pending.
-        */
-        
-       if (sk->retransmits && sk->ip_xmit_timeout == TIME_KEEPOPEN)
-               sk->retransmits = 0;
-
-       /*
-        *      If the ack is newer than sent or older than previous acks
-        *      then we can probably ignore it.
-        */
-        
-       if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
-       {
-               if(sk->debug)
-                       printk("Ack ignored %u %u\n",ack,sk->sent_seq);
-                       
-               /*
-                *      Keepalive processing.
-                */
-                
-               if (after(ack, sk->sent_seq)) 
-               {
-                       return(0);
-               }
-               
-               /*
-                *      Restart the keepalive timer.
-                */
-                
-               if (sk->keepopen) 
-               {
-                       if(sk->ip_xmit_timeout==TIME_KEEPOPEN)
-                               reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
-               }
-               return(1);
-       }
-
-       /*
-        *      If there is data set flag 1
-        */
-        
-       if (len != th->doff*4) 
-               flag |= 1;
-
-       /*
-        *      See if our window has been shrunk. 
-        */
-
-       if (after(sk->window_seq, ack+ntohs(th->window))) 
-       {
-               /*
-                * We may need to move packets from the send queue
-                * to the write queue, if the window has been shrunk on us.
-                * The RFC says you are not allowed to shrink your window
-                * like this, but if the other end does, you must be able
-                * to deal with it.
-                */
-               struct sk_buff *skb;
-               struct sk_buff *skb2;
-               struct sk_buff *wskb = NULL;
-       
-               skb2 = sk->send_head;
-               sk->send_head = NULL;
-               sk->send_tail = NULL;
-       
-               /*
-                *      This is an artifact of a flawed concept. We want one
-                *      queue and a smarter send routine when we send all.
-                */
-       
-               flag |= 4;      /* Window changed */
-       
-               sk->window_seq = ack + ntohs(th->window);
-               cli();
-               while (skb2 != NULL) 
-               {
-                       skb = skb2;
-                       skb2 = skb->link3;
-                       skb->link3 = NULL;
-                       if (after(skb->end_seq, sk->window_seq)) 
-                       {
-                               if (sk->packets_out > 0) 
-                                       sk->packets_out--;
-                               /* We may need to remove this from the dev send list. */
-                               if (skb->next != NULL) 
-                               {
-                                       skb_unlink(skb);                                
-                               }
-                               /* Now add it to the write_queue. */
-                               if (wskb == NULL)
-                                       skb_queue_head(&sk->write_queue,skb);
-                               else
-                                       skb_append(wskb,skb);
-                               wskb = skb;
-                       } 
-                       else 
-                       {
-                               if (sk->send_head == NULL) 
-                               {
-                                       sk->send_head = skb;
-                                       sk->send_tail = skb;
-                               }
-                               else
-                               {
-                                       sk->send_tail->link3 = skb;
-                                       sk->send_tail = skb;
-                               }
-                               skb->link3 = NULL;
-                       }
-               }
-               sti();
-       }
-
-       /*
-        *      Pipe has emptied
-        */
-        
-       if (sk->send_tail == NULL || sk->send_head == NULL) 
-       {
-               sk->send_head = NULL;
-               sk->send_tail = NULL;
-               sk->packets_out= 0;
-       }
-
-       /*
-        *      Update the right hand window edge of the host
-        */
-        
-       sk->window_seq = ack + ntohs(th->window);
-
-       /*
-        *      We don't want too many packets out there. 
-        */
-        
-       if (sk->ip_xmit_timeout == TIME_WRITE && 
-               sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
-       {
-               /* 
-                * This is Jacobson's slow start and congestion avoidance. 
-                * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
-                * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
-                * counter and increment it once every cwnd times.  It's possible
-                * that this should be done only if sk->retransmits == 0.  I'm
-                * interpreting "new data is acked" as including data that has
-                * been retransmitted but is just now being acked.
-                */
-               if (sk->cong_window < sk->ssthresh)  
-                       /* 
-                        *      In "safe" area, increase
-                        */
-                       sk->cong_window++;
-               else 
-               {
-                       /*
-                        *      In dangerous area, increase slowly.  In theory this is
-                        *      sk->cong_window += 1 / sk->cong_window
-                        */
-                       if (sk->cong_count >= sk->cong_window) 
-                       {
-                               sk->cong_window++;
-                               sk->cong_count = 0;
-                       }
-                       else 
-                               sk->cong_count++;
-               }
-       }
-
-       /*
-        *      Remember the highest ack received.
-        */
-        
-       sk->rcv_ack_seq = ack;
-       
-       /*
-        *      We passed data and got it acked, remove any soft error
-        *      log. Something worked...
-        */
-        
-       sk->err_soft = 0;
-
-       /*
-        *      If this ack opens up a zero window, clear backoff.  It was
-        *      being used to time the probes, and is probably far higher than
-        *      it needs to be for normal retransmission.
-        */
-
-       if (sk->ip_xmit_timeout == TIME_PROBE0) 
-       {
-               sk->retransmits = 0;    /* Our probe was answered */
-               
-               /*
-                *      Was it a usable window open ?
-                */
-                
-               if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
-                   ! before (sk->window_seq, sk->write_queue.next->end_seq)) 
-               {
-                       sk->backoff = 0;
-                       
-                       /*
-                        *      Recompute rto from rtt.  this eliminates any backoff.
-                        */
-
-                       sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
-                       if (sk->rto > 120*HZ)
-                               sk->rto = 120*HZ;
-                       if (sk->rto < HZ/5)     /* Was 1*HZ, then 1 - turns out we must allow about
-                                                  .2 of a second because of BSD delayed acks - on a 100Mb/sec link
-                                                  .2 of a second is going to need huge windows (SIGH) */
-                       sk->rto = HZ/5;
-               }
-       }
-
-       /* 
-        *      See if we can take anything off of the retransmit queue.
-        */
-   
-       while(sk->send_head != NULL) 
-       {
-               /* Check for a bug. */
-               if (sk->send_head->link3 &&
-                   after(sk->send_head->end_seq, sk->send_head->link3->end_seq)) 
-                       printk("INET: tcp.c: *** bug send_list out of order.\n");
-                       
-               /*
-                *      If our packet is before the ack sequence we can
-                *      discard it as it's confirmed to have arrived the other end.
-                */
-                
-               if (before(sk->send_head->end_seq, ack+1)) 
-               {
-                       struct sk_buff *oskb;   
-                       if (sk->retransmits) 
-                       {       
-                               /*
-                                *      We were retransmitting.  don't count this in RTT est 
-                                */
-                               flag |= 2;
-
-                               /*
-                                * even though we've gotten an ack, we're still
-                                * retransmitting as long as we're sending from
-                                * the retransmit queue.  Keeping retransmits non-zero
-                                * prevents us from getting new data interspersed with
-                                * retransmissions.
-                                */
-
-                               if (sk->send_head->link3)       /* Any more queued retransmits? */
-                                       sk->retransmits = 1;
-                               else
-                                       sk->retransmits = 0;
-                       }
-                       /*
-                        * Note that we only reset backoff and rto in the
-                        * rtt recomputation code.  And that doesn't happen
-                        * if there were retransmissions in effect.  So the
-                        * first new packet after the retransmissions is
-                        * sent with the backoff still in effect.  Not until
-                        * we get an ack from a non-retransmitted packet do
-                        * we reset the backoff and rto.  This allows us to deal
-                        * with a situation where the network delay has increased
-                        * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
-                        */
-
-                       /*
-                        *      We have one less packet out there. 
-                        */
-                        
-                       if (sk->packets_out > 0) 
-                               sk->packets_out --;
-                       /* 
-                        *      Wake up the process, it can probably write more. 
-                        */
-                       if (!sk->dead) 
-                               sk->write_space(sk);
-                       oskb = sk->send_head;
-
-                       if (!(flag&2))  /* Not retransmitting */
-                       {
-                               long m;
-       
-                               /*
-                                *      The following amusing code comes from Jacobson's
-                                *      article in SIGCOMM '88.  Note that rtt and mdev
-                                *      are scaled versions of rtt and mean deviation.
-                                *      This is designed to be as fast as possible 
-                                *      m stands for "measurement".
-                                */
-       
-                               m = jiffies - oskb->when;  /* RTT */
-                               if(m<=0)
-                                       m=1;            /* IS THIS RIGHT FOR <0 ??? */
-                               m -= (sk->rtt >> 3);    /* m is now error in rtt est */
-                               sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
-                               if (m < 0)
-                                       m = -m;         /* m is now abs(error) */
-                               m -= (sk->mdev >> 2);   /* similar update on mdev */
-                               sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
-       
-                               /*
-                                *      Now update timeout.  Note that this removes any backoff.
-                                */
-                        
-                               sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
-                               if (sk->rto > 120*HZ)
-                                       sk->rto = 120*HZ;
-                               if (sk->rto < HZ/5)     /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
-                                       sk->rto = HZ/5;
-                               sk->backoff = 0;
-                       }
-                       flag |= (2|4);  /* 2 is really more like 'don't adjust the rtt 
-                                          In this case as we just set it up */
-                       cli();
-                       oskb = sk->send_head;
-                       IS_SKB(oskb);
-                       sk->send_head = oskb->link3;
-                       if (sk->send_head == NULL) 
-                       {
-                               sk->send_tail = NULL;
-                       }
-
-               /*
-                *      We may need to remove this from the dev send list. 
-                */
-
-                       if (oskb->next)
-                               skb_unlink(oskb);
-                       sti();
-                       kfree_skb(oskb, FREE_WRITE); /* write. */
-                       if (!sk->dead) 
-                               sk->write_space(sk);
-               }
-               else
-               {
-                       break;
-               }
-       }
-
-       /*
-        * XXX someone ought to look at this too.. at the moment, if skb_peek()
-        * returns non-NULL, we complete ignore the timer stuff in the else
-        * clause.  We ought to organize the code so that else clause can
-        * (should) be executed regardless, possibly moving the PROBE timer
-        * reset over.  The skb_peek() thing should only move stuff to the
-        * write queue, NOT also manage the timer functions.
-        */
-
-       /*
-        * Maybe we can take some stuff off of the write queue,
-        * and put it onto the xmit queue.
-        */
-       if (skb_peek(&sk->write_queue) != NULL) 
-       {
-               if (after (sk->window_seq+1, sk->write_queue.next->end_seq) &&
-                       (sk->retransmits == 0 || 
-                        sk->ip_xmit_timeout != TIME_WRITE ||
-                        before(sk->write_queue.next->end_seq, sk->rcv_ack_seq + 1))
-                       && sk->packets_out < sk->cong_window) 
-               {
-                       /*
-                        *      Add more data to the send queue.
-                        */
-                       flag |= 1;
-                       tcp_write_xmit(sk);
-               }
-               else if (before(sk->window_seq, sk->write_queue.next->end_seq) &&
-                       sk->send_head == NULL &&
-                       sk->ack_backlog == 0 &&
-                       sk->state != TCP_TIME_WAIT) 
-               {
-                       /*
-                        *      Data to queue but no room.
-                        */
-                       reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
-               }               
-       }
-       else
-       {
-               /*
-                * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
-                * from TCP_CLOSE we don't do anything
-                *
-                * from anything else, if there is write data (or fin) pending,
-                * we use a TIME_WRITE timeout, else if keepalive we reset to
-                * a KEEPALIVE timeout, else we delete the timer.
-                *
-                * We do not set flag for nominal write data, otherwise we may
-                * force a state where we start to write itsy bitsy tidbits
-                * of data.
-                */
-
-               switch(sk->state) {
-               case TCP_TIME_WAIT:
-                       /*
-                        * keep us in TIME_WAIT until we stop getting packets,
-                        * reset the timeout.
-                        */
-                       reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
-                       break;
-               case TCP_CLOSE:
-                       /*
-                        * don't touch the timer.
-                        */
-                       break;
-               default:
-                       /*
-                        *      Must check send_head, write_queue, and ack_backlog
-                        *      to determine which timeout to use.
-                        */
-                       if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) {
-                               reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-                       } else if (sk->keepopen) {
-                               reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
-                       } else {
-                               del_timer(&sk->retransmit_timer);
-                               sk->ip_xmit_timeout = 0;
-                       }
-                       break;
-               }
-       }
-
-       /*
-        *      We have nothing queued but space to send. Send any partial
-        *      packets immediately (end of Nagle rule application).
-        */
-        
-       if (sk->packets_out == 0 && sk->partial != NULL &&
-               skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
-       {
-               flag |= 1;
-               tcp_send_partial(sk);
-       }
-
-       /*
-        * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
-        * we are now waiting for an acknowledge to our FIN.  The other end is
-        * already in TIME_WAIT.
-        *
-        * Move to TCP_CLOSE on success.
-        */
-
-       if (sk->state == TCP_LAST_ACK) 
-       {
-               if (!sk->dead)
-                       sk->state_change(sk);
-               if(sk->debug)
-                       printk("rcv_ack_seq: %X==%X, acked_seq: %X==%X\n",
-                               sk->rcv_ack_seq,sk->write_seq,sk->acked_seq,sk->fin_seq);
-               if (sk->rcv_ack_seq == sk->write_seq /*&& sk->acked_seq == sk->fin_seq*/) 
-               {
-                       flag |= 1;
-                       sk->shutdown = SHUTDOWN_MASK;
-                       tcp_set_state(sk,TCP_CLOSE);
-                       return 1;
-               }
-       }
-
-       /*
-        *      Incoming ACK to a FIN we sent in the case of our initiating the close.
-        *
-        *      Move to FIN_WAIT2 to await a FIN from the other end. Set
-        *      SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in.
-        */
-
-       if (sk->state == TCP_FIN_WAIT1) 
-       {
-
-               if (!sk->dead) 
-                       sk->state_change(sk);
-               if (sk->rcv_ack_seq == sk->write_seq) 
-               {
-                       flag |= 1;
-                       sk->shutdown |= SEND_SHUTDOWN;
-                       tcp_set_state(sk, TCP_FIN_WAIT2);
-               }
-       }
-
-       /*
-        *      Incoming ACK to a FIN we sent in the case of a simultaneous close.
-        *
-        *      Move to TIME_WAIT
-        */
-
-       if (sk->state == TCP_CLOSING) 
-       {
-
-               if (!sk->dead) 
-                       sk->state_change(sk);
-               if (sk->rcv_ack_seq == sk->write_seq) 
-               {
-                       flag |= 1;
-                       tcp_time_wait(sk);
-               }
-       }
-       
-       /*
-        *      Final ack of a three way shake 
-        */
-        
-       if(sk->state==TCP_SYN_RECV)
-       {
-               tcp_set_state(sk, TCP_ESTABLISHED);
-               tcp_options(sk,th);
-               sk->dummy_th.dest=th->source;
-               sk->copied_seq = sk->acked_seq;
-               if(!sk->dead)
-                       sk->state_change(sk);
-               if(sk->max_window==0)
-               {
-                       sk->max_window=32;      /* Sanity check */
-                       sk->mss=min(sk->max_window,sk->mtu);
-               }
-       }
-       
-       /*
-        * I make no guarantees about the first clause in the following
-        * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
-        * what conditions "!flag" would be true.  However I think the rest
-        * of the conditions would prevent that from causing any
-        * unnecessary retransmission. 
-        *   Clearly if the first packet has expired it should be 
-        * retransmitted.  The other alternative, "flag&2 && retransmits", is
-        * harder to explain:  You have to look carefully at how and when the
-        * timer is set and with what timeout.  The most recent transmission always
-        * sets the timer.  So in general if the most recent thing has timed
-        * out, everything before it has as well.  So we want to go ahead and
-        * retransmit some more.  If we didn't explicitly test for this
-        * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
-        * would not be true.  If you look at the pattern of timing, you can
-        * show that rto is increased fast enough that the next packet would
-        * almost never be retransmitted immediately.  Then you'd end up
-        * waiting for a timeout to send each packet on the retransmission
-        * queue.  With my implementation of the Karn sampling algorithm,
-        * the timeout would double each time.  The net result is that it would
-        * take a hideous amount of time to recover from a single dropped packet.
-        * It's possible that there should also be a test for TIME_WRITE, but
-        * I think as long as "send_head != NULL" and "retransmit" is on, we've
-        * got to be in real retransmission mode.
-        *   Note that tcp_do_retransmit is called with all==1.  Setting cong_window
-        * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
-        * As long as no further losses occur, this seems reasonable.
-        */
-       
-       if (((!flag) || (flag&4)) && sk->send_head != NULL &&
-              (((flag&2) && sk->retransmits) ||
-              (sk->send_head->when + sk->rto < jiffies))) 
-       {
-               if(sk->send_head->when + sk->rto < jiffies)
-                       tcp_retransmit(sk,0);   
-               else
-               {
-                       tcp_do_retransmit(sk, 1);
-                       reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-               }
-       }
-
-       return(1);
-}
-
-
-/*
- *     Process the FIN bit. This now behaves as it is supposed to work
- *     and the FIN takes effect when it is validly part of sequence
- *     space. Not before when we get holes.
- *
- *     If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
- *     (and thence onto LAST-ACK and finally, CLOSE, we never enter
- *     TIME-WAIT)
- *
- *     If we are in FINWAIT-1, a received FIN indicates simultaneous
- *     close and we go into CLOSING (and later onto TIME-WAIT)
- *
- *     If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
- *
- */
- 
-static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
-{
-       sk->fin_seq = skb->end_seq;
-
-       if (!sk->dead) 
-       {
-               sk->state_change(sk);
-               sock_wake_async(sk->socket, 1);
-       }
-
-       switch(sk->state) 
-       {
-               case TCP_SYN_RECV:
-               case TCP_SYN_SENT:
-               case TCP_ESTABLISHED:
-                       /*
-                        * move to CLOSE_WAIT, tcp_data() already handled
-                        * sending the ack.
-                        */
-                       tcp_set_state(sk,TCP_CLOSE_WAIT);
-                       if (th->rst)
-                               sk->shutdown = SHUTDOWN_MASK;
-                       break;
-
-               case TCP_CLOSE_WAIT:
-               case TCP_CLOSING:
-                       /*
-                        * received a retransmission of the FIN, do
-                        * nothing.
-                        */
-                       break;
-               case TCP_TIME_WAIT:
-                       /*
-                        * received a retransmission of the FIN,
-                        * restart the TIME_WAIT timer.
-                        */
-                       reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
-                       return(0);
-               case TCP_FIN_WAIT1:
-                       /*
-                        * This case occurs when a simultaneous close
-                        * happens, we must ack the received FIN and
-                        * enter the CLOSING state.
-                        *
-                        * This causes a WRITE timeout, which will either
-                        * move on to TIME_WAIT when we timeout, or resend
-                        * the FIN properly (maybe we get rid of that annoying
-                        * FIN lost hang). The TIME_WRITE code is already correct
-                        * for handling this timeout.
-                        */
-
-                       if(sk->ip_xmit_timeout != TIME_WRITE)
-                               reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-                       tcp_set_state(sk,TCP_CLOSING);
-                       break;
-               case TCP_FIN_WAIT2:
-                       /*
-                        * received a FIN -- send ACK and enter TIME_WAIT
-                        */
-                       reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
-                       sk->shutdown|=SHUTDOWN_MASK;
-                       tcp_set_state(sk,TCP_TIME_WAIT);
-                       break;
-               case TCP_CLOSE:
-                       /*
-                        * already in CLOSE
-                        */
-                       break;
-               default:
-                       tcp_set_state(sk,TCP_LAST_ACK);
-       
-                       /* Start the timers. */
-                       reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
-                       return(0);
-       }
-
-       return(0);
-}
-
-
-
-/*
- *     This routine handles the data.  If there is room in the buffer,
- *     it will be have already been moved into it.  If there is no
- *     room, then we will just have to discard the packet.
- */
-
-extern /* __inline__ */ int tcp_data(struct sk_buff *skb, struct sock *sk, 
-        unsigned long saddr, unsigned short len)
-{
-       struct sk_buff *skb1, *skb2;
-       struct tcphdr *th;
-       int dup_dumped=0;
-       u32 new_seq, shut_seq;
-
-       th = skb->h.th;
-       skb_pull(skb,th->doff*4);
-       skb_trim(skb,len-(th->doff*4));
-
-       /*
-        *      The bytes in the receive read/assembly queue has increased. Needed for the
-        *      low memory discard algorithm 
-        */
-          
-       sk->bytes_rcv += skb->len;
-       
-       if (skb->len == 0 && !th->fin) 
-       {
-               /* 
-                *      Don't want to keep passing ack's back and forth. 
-                *      (someone sent us dataless, boring frame)
-                */
-               if (!th->ack)
-                       tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
-               kfree_skb(skb, FREE_READ);
-               return(0);
-       }
-       
-       /*
-        *      We no longer have anyone receiving data on this connection.
-        */
-
-#ifndef TCP_DONT_RST_SHUTDOWN           
-
-       if(sk->shutdown & RCV_SHUTDOWN)
-       {
-               /*
-                *      FIXME: BSD has some magic to avoid sending resets to
-                *      broken 4.2 BSD keepalives. Much to my surprise a few non
-                *      BSD stacks still have broken keepalives so we want to
-                *      cope with it.
-                */
-
-               if(skb->len)    /* We don't care if it's just an ack or
-                                  a keepalive/window probe */
-               {
-                       new_seq = skb->seq + skb->len + th->syn;        /* Right edge of _data_ part of frame */
-                       
-                       /* Do this the way 4.4BSD treats it. Not what I'd
-                          regard as the meaning of the spec but it's what BSD
-                          does and clearly they know everything 8) */
-
-                       /*
-                        *      This is valid because of two things
-                        *
-                        *      a) The way tcp_data behaves at the bottom.
-                        *      b) A fin takes effect when read not when received.
-                        */
-                        
-                       shut_seq = sk->acked_seq+1;     /* Last byte */
-                       
-                       if(after(new_seq,shut_seq))
-                       {
-                               if(sk->debug)
-                                       printk("Data arrived on %p after close [Data right edge %X, Socket shut on %X] %d\n",
-                                               sk, new_seq, shut_seq, sk->blog);
-                               if(sk->dead)
-                               {
-                                       sk->acked_seq = new_seq + th->fin;
-                                       tcp_reset(sk->saddr, sk->daddr, skb->h.th,
-                                               sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
-                                       tcp_statistics.TcpEstabResets++;
-                                       sk->err = EPIPE;
-                                       sk->error_report(sk);
-                                       sk->shutdown = SHUTDOWN_MASK;
-                                       tcp_set_state(sk,TCP_CLOSE);
-                                       kfree_skb(skb, FREE_READ);
-                                       return 0;
-                               }
-                       }
-               }
-       }
-
-#endif
-
-       /*
-        *      Now we have to walk the chain, and figure out where this one
-        *      goes into it.  This is set up so that the last packet we received
-        *      will be the first one we look at, that way if everything comes
-        *      in order, there will be no performance loss, and if they come
-        *      out of order we will be able to fit things in nicely.
-        *
-        *      [AC: This is wrong. We should assume in order first and then walk
-        *       forwards from the first hole based upon real traffic patterns.]
-        *      
-        */
-
-       if (skb_peek(&sk->receive_queue) == NULL)       /* Empty queue is easy case */
-       {
-               skb_queue_head(&sk->receive_queue,skb);
-               skb1= NULL;
-       } 
-       else
-       {
-               for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
-               {
-                       if(sk->debug)
-                       {
-                               printk("skb1=%p :", skb1);
-                               printk("skb1->seq = %d: ", skb1->seq);
-                               printk("skb->seq = %d\n",skb->seq);
-                               printk("copied_seq = %d acked_seq = %d\n", sk->copied_seq,
-                                               sk->acked_seq);
-                       }
-                       
-                       /*
-                        *      Optimisation: Duplicate frame or extension of previous frame from
-                        *      same sequence point (lost ack case).
-                        *      The frame contains duplicate data or replaces a previous frame
-                        *      discard the previous frame (safe as sk->inuse is set) and put
-                        *      the new one in its place.
-                        */
-                        
-                       if (skb->seq==skb1->seq && skb->len>=skb1->len)
-                       {
-                               skb_append(skb1,skb);
-                               skb_unlink(skb1);
-                               kfree_skb(skb1,FREE_READ);
-                               dup_dumped=1;
-                               skb1=NULL;
-                               break;
-                       }
-                       
-                       /*
-                        *      Found where it fits
-                        */
-                        
-                       if (after(skb->seq+1, skb1->seq))
-                       {
-                               skb_append(skb1,skb);
-                               break;
-                       }
-                       
-                       /*
-                        *      See if we've hit the start. If so insert.
-                        */
-                       if (skb1 == skb_peek(&sk->receive_queue))
-                       {
-                               skb_queue_head(&sk->receive_queue, skb);
-                               break;
-                       }
-               }
-       }
-
-       /*
-        *      Figure out what the ack value for this frame is
-        */
-        
-       if (before(sk->acked_seq, sk->copied_seq)) 
-       {
-               printk("*** tcp.c:tcp_data bug acked < copied\n");
-               sk->acked_seq = sk->copied_seq;
-       }
-
-       /*
-        *      Now figure out if we can ack anything. This is very messy because we really want two
-        *      receive queues, a completed and an assembly queue. We also want only one transmit
-        *      queue.
-        */
-
-       if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(skb->seq, sk->acked_seq+1)) 
-       {
-               if (before(skb->seq, sk->acked_seq+1)) 
-               {
-
-                       if (after(skb->end_seq, sk->acked_seq)) 
-                               sk->acked_seq = skb->end_seq;
-
-                       skb->acked = 1;
-
-                       /*
-                        *      When we ack the fin, we do the FIN 
-                        *      processing.
-                        */
-
-                       if (skb->h.th->fin) 
-                       {
-                               tcp_fin(skb,sk,skb->h.th);
-                       }
-         
-                       for(skb2 = skb->next;
-                           skb2 != (struct sk_buff *)&sk->receive_queue;
-                           skb2 = skb2->next) 
-                       {
-                               if (before(skb2->seq, sk->acked_seq+1)) 
-                               {
-                                       if (after(skb2->end_seq, sk->acked_seq))
-                                               sk->acked_seq = skb2->end_seq;
-
-                                       skb2->acked = 1;
-                                       /*
-                                        *      When we ack the fin, we do
-                                        *      the fin handling.
-                                        */
-                                       if (skb2->h.th->fin) 
-                                       {
-                                               tcp_fin(skb,sk,skb->h.th);
-                                       }
-
-                                       /*
-                                        *      Force an immediate ack.
-                                        */
-                                        
-                                       sk->ack_backlog = sk->max_ack_backlog;
-                               }
-                               else
-                               {
-                                       break;
-                               }
-                       }
-
-                       /*
-                        *      This also takes care of updating the window.
-                        *      This if statement needs to be simplified.
-                        *
-                        *      rules for delaying an ack:
-                        *      - delay time <= 0.5 HZ
-                        *      - we don't have a window update to send
-                        *      - must send at least every 2 full sized packets
-                        */
-                       if (!sk->delay_acks ||
-                           sk->ack_backlog >= sk->max_ack_backlog || 
-                           sk->bytes_rcv > sk->max_unacked || th->fin ||
-                           sk->ato > HZ/2 ||
-                           tcp_raise_window(sk)) {
-       /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
-                       }
-                       else 
-                       {
-                               sk->ack_backlog++;
-                               
-                               if(sk->debug)                           
-                                       printk("Ack queued.\n");
-                               reset_xmit_timer(sk, TIME_WRITE, sk->ato);
-                       }
-               }
-       }
-
-       /*
-        *      If we've missed a packet, send an ack.
-        *      Also start a timer to send another.
-        */
-        
-       if (!skb->acked) 
-       {
-       
-       /*
-        *      This is important.  If we don't have much room left,
-        *      we need to throw out a few packets so we have a good
-        *      window.  Note that mtu is used, not mss, because mss is really
-        *      for the send side.  He could be sending us stuff as large as mtu.
-        */
-                
-               while (sock_rspace(sk) < sk->mtu) 
-               {
-                       skb1 = skb_peek(&sk->receive_queue);
-                       if (skb1 == NULL) 
-                       {
-                               printk("INET: tcp.c:tcp_data memory leak detected.\n");
-                               break;
-                       }
-
-                       /*
-                        *      Don't throw out something that has been acked. 
-                        */
-                
-                       if (skb1->acked) 
-                       {
-                               break;
-                       }
-               
-                       skb_unlink(skb1);
-                       kfree_skb(skb1, FREE_READ);
-               }
-               tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
-               sk->ack_backlog++;
-               reset_xmit_timer(sk, TIME_WRITE, min(sk->ato, 0.5 * HZ));
-       }
-       else
-       {
-               tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
-       }
-
-       /*
-        *      Now tell the user we may have some data. 
-        */
-        
-       if (!sk->dead) 
-       {
-               if(sk->debug)
-                       printk("Data wakeup.\n");
-               sk->data_ready(sk,0);
-       } 
-       return(0);
-}
-
-
-/*
- *     This routine is only called when we have urgent data
- *     signalled. Its the 'slow' part of tcp_urg. It could be
- *     moved inline now as tcp_urg is only called from one
- *     place. We handle URGent data wrong. We have to - as
- *     BSD still doesn't use the correction from RFC961.
- */
- 
-static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
-{
-       u32 ptr = ntohs(th->urg_ptr);
-
-       if (ptr)
-               ptr--;
-       ptr += ntohl(th->seq);
-
-       /* ignore urgent data that we've already seen and read */
-       if (after(sk->copied_seq, ptr))
-               return;
-
-       /* do we already have a newer (or duplicate) urgent pointer? */
-       if (sk->urg_data && !after(ptr, sk->urg_seq))
-               return;
-
-       /* tell the world about our new urgent pointer */
-       if (sk->proc != 0) {
-               if (sk->proc > 0) {
-                       kill_proc(sk->proc, SIGURG, 1);
-               } else {
-                       kill_pg(-sk->proc, SIGURG, 1);
-               }
-       }
-       sk->urg_data = URG_NOTYET;
-       sk->urg_seq = ptr;
-}
-
-/*
- *     This is the 'fast' part of urgent handling.
- */
- 
-extern __inline__ int tcp_urg(struct sock *sk, struct tcphdr *th,
-       unsigned long saddr, unsigned long len)
-{
-       u32 ptr;
-
-       /*
-        *      Check if we get a new urgent pointer - normally not 
-        */
-        
-       if (th->urg)
-               tcp_check_urg(sk,th);
-
-       /*
-        *      Do we wait for any urgent data? - normally not
-        */
-        
-       if (sk->urg_data != URG_NOTYET)
-               return 0;
-
-       /*
-        *      Is the urgent pointer pointing into this packet? 
-        */
-        
-       ptr = sk->urg_seq - ntohl(th->seq) + th->doff*4;
-       if (ptr >= len)
-               return 0;
-
-       /*
-        *      Ok, got the correct packet, update info 
-        */
-        
-       sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
-       if (!sk->dead)
-               sk->data_ready(sk,0);
-       return 0;
-}
-
-/*
- *     This will accept the next outstanding connection. 
- */
- 
-static struct sock *tcp_accept(struct sock *sk, int flags)
-{
-       struct sock *newsk;
-       struct sk_buff *skb;
-  
-  /*
-   * We need to make sure that this socket is listening,
-   * and that it has something pending.
-   */
-
-       if (sk->state != TCP_LISTEN) 
-       {
-               sk->err = EINVAL;
-               return(NULL); 
-       }
-
-       /* Avoid the race. */
-       cli();
-       sk->inuse = 1;
-
-       while((skb = tcp_dequeue_established(sk)) == NULL) 
-       {
-               if (flags & O_NONBLOCK) 
-               {
-                       sti();
-                       release_sock(sk);
-                       sk->err = EAGAIN;
-                       return(NULL);
-               }
-
-               release_sock(sk);
-               interruptible_sleep_on(sk->sleep);
-               if (current->signal & ~current->blocked) 
-               {
-                       sti();
-                       sk->err = ERESTARTSYS;
-                       return(NULL);
-               }
-               sk->inuse = 1;
-       }
-       sti();
-
-       /*
-        *      Now all we need to do is return skb->sk. 
-        */
-
-       newsk = skb->sk;
-
-       kfree_skb(skb, FREE_READ);
-       sk->ack_backlog--;
-       release_sock(sk);
-       return(newsk);
-}
-
-
-/*
- *     This will initiate an outgoing connection. 
- */
- 
-static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
-{
-       struct sk_buff *buff;
-       struct device *dev=NULL;
-       unsigned char *ptr;
-       int tmp;
-       int atype;
-       struct tcphdr *t1;
-       struct rtable *rt;
-
-       if (sk->state != TCP_CLOSE) 
-               return(-EISCONN);
-
-       /*
-        *      Don't allow a double connect.
-        */
-               
-       if(sk->daddr)
-               return -EINVAL;
-       
-       if (addr_len < 8) 
-               return(-EINVAL);
-
-       if (usin->sin_family && usin->sin_family != AF_INET) 
-               return(-EAFNOSUPPORT);
-
-       /*
-        *      connect() to INADDR_ANY means loopback (BSD'ism).
-        */
-       
-       if(usin->sin_addr.s_addr==INADDR_ANY)
-               usin->sin_addr.s_addr=ip_my_addr();
-                 
-       /*
-        *      Don't want a TCP connection going to a broadcast address 
-        */
-
-       if ((atype=ip_chk_addr(usin->sin_addr.s_addr)) == IS_BROADCAST || atype==IS_MULTICAST) 
-               return -ENETUNREACH;
-  
-       sk->inuse = 1;
-       sk->daddr = usin->sin_addr.s_addr;
-       sk->write_seq = tcp_init_seq();
-       sk->window_seq = sk->write_seq;
-       sk->rcv_ack_seq = sk->write_seq -1;
-       sk->err = 0;
-       sk->dummy_th.dest = usin->sin_port;
-       release_sock(sk);
-
-       buff = sock_wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
-       if (buff == NULL) 
-       {
-               return(-ENOMEM);
-       }
-       sk->inuse = 1;
-       buff->sk = sk;
-       buff->free = 0;
-       buff->localroute = sk->localroute;
-       
-
-       /*
-        *      Put in the IP header and routing stuff.
-        */
-        
-       tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
-               IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
-       if (tmp < 0) 
-       {
-               sock_wfree(sk, buff);
-               release_sock(sk);
-               return(-ENETUNREACH);
-       }
-       if ((rt = sk->ip_route_cache) != NULL && !sk->saddr)
-               sk->saddr = rt->rt_src;
-       sk->rcv_saddr = sk->saddr;
-
-       t1 = (struct tcphdr *) skb_put(buff,sizeof(struct tcphdr));
-
-       memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
-       buff->seq = sk->write_seq++;
-       t1->seq = htonl(buff->seq);
-       sk->sent_seq = sk->write_seq;
-       buff->end_seq = sk->write_seq;
-       t1->ack = 0;
-       t1->window = 2;
-       t1->res1=0;
-       t1->res2=0;
-       t1->rst = 0;
-       t1->urg = 0;
-       t1->psh = 0;
-       t1->syn = 1;
-       t1->urg_ptr = 0;
-       t1->doff = 6;
-       /* use 512 or whatever user asked for */
-       
-       if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
-               sk->window_clamp=rt->rt_window;
-       else
-               sk->window_clamp=0;
-
-       if (sk->user_mss)
-               sk->mtu = sk->user_mss;
-       else if (rt)
-               sk->mtu = rt->rt_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
-       else 
-               sk->mtu = 576 - sizeof(struct iphdr) - sizeof(struct tcphdr);
-
-       /*
-        *      but not bigger than device MTU 
-        */
-
-       if(sk->mtu <32)
-               sk->mtu = 32;   /* Sanity limit */
-               
-       sk->mtu = min(sk->mtu, dev->mtu - sizeof(struct iphdr) - sizeof(struct tcphdr));
-
-#ifdef CONFIG_SKIP
-       
-       /*
-        *      SKIP devices set their MTU to 65535. This is so they can take packets
-        *      unfragmented to security process then fragment. They could lie to the
-        *      TCP layer about a suitable MTU, but its easier to let skip sort it out
-        *      simply because the final package we want unfragmented is going to be
-        *
-        *      [IPHDR][IPSP][Security data][Modified TCP data][Security data]
-        */
-        
-       if(skip_pick_mtu!=NULL)         /* If SKIP is loaded.. */
-               sk->mtu=skip_pick_mtu(sk->mtu,dev);
-#endif
-       
-       /*
-        *      Put in the TCP options to say MTU. 
-        */
-
-       ptr = skb_put(buff,4);
-       ptr[0] = 2;
-       ptr[1] = 4;
-       ptr[2] = (sk->mtu) >> 8;
-       ptr[3] = (sk->mtu) & 0xff;
-       tcp_send_check(t1, sk->saddr, sk->daddr,
-                 sizeof(struct tcphdr) + 4, sk);
-
-       /*
-        *      This must go first otherwise a really quick response will get reset. 
-        */
-
-       tcp_cache_zap();
-       tcp_set_state(sk,TCP_SYN_SENT);
-       if(rt&&rt->rt_flags&RTF_IRTT)
-               sk->rto = rt->rt_irtt;
-       else
-               sk->rto = TCP_TIMEOUT_INIT;
-       sk->retransmit_timer.function=&retransmit_timer;
-       sk->retransmit_timer.data = (unsigned long)sk;
-       reset_xmit_timer(sk, TIME_WRITE, sk->rto);      /* Timer for repeating the SYN until an answer  */
-       sk->retransmits = 0;                            /* Now works the right way instead of a hacked 
-                                                                                       initial setting */
-
-       sk->prot->queue_xmit(sk, dev, buff, 0);  
-       reset_xmit_timer(sk, TIME_WRITE, sk->rto);
-       tcp_statistics.TcpActiveOpens++;
-       tcp_statistics.TcpOutSegs++;
-  
-       release_sock(sk);
-       return(0);
-}
-
-/*
- * React to a out-of-window TCP sequence number in an incoming packet
- */
-static void bad_tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
-            struct options *opt, unsigned long saddr, struct device *dev)
-{
-       if (th->rst)
-               return;
-
-       /*
-        *      Send a reset if we get something not ours and we are
-        *      unsynchronized. Note: We don't do anything to our end. We
-        *      are just killing the bogus remote connection then we will
-        *      connect again and it will work (with luck).
-        */
-        
-       if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) 
-       {
-               tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
-               return;
-       }
-
-       /* Try to resync things. */
-       tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
-       return;
-}
-
-/*
- *     This functions checks to see if the tcp header is actually acceptable. 
- */
- 
-extern __inline__ int tcp_sequence(struct sock *sk, u32 seq, u32 end_seq)
-{
-       /* does the packet contain any unseen data AND */
-       /* does the packet start before the window? */
-       return  after(end_seq+1, sk->acked_seq) &&
-               before(seq, sk->acked_seq + sk->window + 1);
-}
-
-/*
- *     When we get a reset we do this.
- */
-
-static int tcp_std_reset(struct sock *sk, struct sk_buff *skb)
-{
-       sk->zapped = 1;
-       sk->err = ECONNRESET;
-       if (sk->state == TCP_SYN_SENT)
-               sk->err = ECONNREFUSED;
-       if (sk->state == TCP_CLOSE_WAIT)
-               sk->err = EPIPE;
-#ifdef TCP_DO_RFC1337          
-       /*
-        *      Time wait assassination protection [RFC1337]
-        */
-       if(sk->state!=TCP_TIME_WAIT)
-       {       
-               tcp_set_state(sk,TCP_CLOSE);
-               sk->shutdown = SHUTDOWN_MASK;
-       }
-#else  
-       tcp_set_state(sk,TCP_CLOSE);
-       sk->shutdown = SHUTDOWN_MASK;
-#endif 
-       if (!sk->dead) 
-               sk->state_change(sk);
-       kfree_skb(skb, FREE_READ);
-       release_sock(sk);
-       return(0);
-}
-
-/*
- *     Find the socket, using the last hit cache if applicable.
- */
-static inline struct sock * get_tcp_sock(u32 saddr, u16 sport, u32 daddr, u16 dport)
-{
-       struct sock * sk;
-
-       sk = (struct sock *) th_cache_sk;
-       if (saddr != th_cache_saddr || daddr != th_cache_daddr ||
-           sport != th_cache_sport || dport != th_cache_dport) {
-               sk = get_sock(&tcp_prot, dport, saddr, sport, daddr);
-               if (sk) {
-                       th_cache_saddr=saddr;
-                       th_cache_daddr=daddr;
-                       th_cache_dport=dport;
-                       th_cache_sport=sport;
-                       th_cache_sk=sk;
-               }
-       }
-       return sk;
-}
-
-
-/*
- *     A TCP packet has arrived.
- *             skb->h.raw is the TCP header.
- */
- 
-int tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
-       __u32 daddr, unsigned short len,
-       __u32 saddr, int redo, struct inet_protocol * protocol)
-{
-       struct tcphdr *th;
-       struct sock *sk;
-       int syn_ok=0;
-
-       /*
-        * "redo" is 1 if we have already seen this skb but couldn't
-        * use it at that time (the socket was locked).  In that case
-        * we have already done a lot of the work (looked up the socket
-        * etc).
-        */
-       th = skb->h.th;
-       sk = skb->sk;
-       if (!redo) {
-               tcp_statistics.TcpInSegs++;
-               if (skb->pkt_type!=PACKET_HOST)
-               {
-                       kfree_skb(skb,FREE_READ);
-                       return(0);
-               }
-               /*
-                *      Pull up the IP header.
-                */
-               skb_pull(skb, skb->h.raw-skb->data);
-               /*
-                *      Try to use the device checksum if provided.
-                */
-               if (
-                       ((skb->ip_summed == CHECKSUM_HW) && tcp_check(th, len, saddr, daddr, skb->csum ))||
-                       ((skb->ip_summed == CHECKSUM_NONE) && tcp_check(th, len, saddr, daddr, csum_partial((char *)th, len, 0)))
-                   /* skip if CHECKSUM_UNNECESSARY */
-                   )
-               {
-                       skb->sk = NULL;
-                       kfree_skb(skb,FREE_READ);
-                       /*
-                        *      We don't release the socket because it was
-                        *      never marked in use.
-                        */
-                       return(0);
-               }
-               sk = get_tcp_sock(saddr, th->source, daddr, th->dest);
-               if (!sk)
-                       goto no_tcp_socket;
-               skb->sk = sk;
-               skb->seq = ntohl(th->seq);
-               skb->end_seq = skb->seq + th->syn + th->fin + len - th->doff*4;
-               skb->ack_seq = ntohl(th->ack_seq);
-
-               skb->acked = 0;
-               skb->used = 0;
-               skb->free = 0;
-               skb->saddr = daddr;
-               skb->daddr = saddr;
-       
-               /* We may need to add it to the backlog here. */
-               cli();
-               if (sk->inuse) 
-               {
-                       skb_queue_tail(&sk->back_log, skb);
-                       sti();
-                       return(0);
-               }
-               sk->inuse = 1;
-               sti();
-       }
-
-       /*
-        *      If this socket has got a reset it's to all intents and purposes 
-        *      really dead. Count closed sockets as dead.
-        *
-        *      Note: BSD appears to have a bug here. A 'closed' TCP in BSD
-        *      simply drops data. This seems incorrect as a 'closed' TCP doesn't
-        *      exist so should cause resets as if the port was unreachable.
-        */
-
-       if (sk->zapped || sk->state==TCP_CLOSE)
-               goto no_tcp_socket;
-
-       if (!sk->prot) 
-       {
-               printk("IMPOSSIBLE 3\n");
-               return(0);
-       }
-
-
-       /*
-        *      Charge the memory to the socket. 
-        */
-        
-       skb->sk=sk;
-       sk->rmem_alloc += skb->truesize;
-
-       /*
-        *      This basically follows the flow suggested by RFC793, with the corrections in RFC1122. We
-        *      don't implement precedence and we process URG incorrectly (deliberately so) for BSD bug
-        *      compatibility. We also set up variables more thoroughly [Karn notes in the
-        *      KA9Q code the RFC793 incoming segment rules don't initialise the variables for all paths].
-        */
-
-       if(sk->state!=TCP_ESTABLISHED)          /* Skip this lot for normal flow */
-       {
-       
-               /*
-                *      Now deal with unusual cases.
-                */
-        
-               if(sk->state==TCP_LISTEN)
-               {
-                       if(th->ack)     /* These use the socket TOS.. might want to be the received TOS */
-                               tcp_reset(daddr,saddr,th,sk->prot,opt,dev,sk->ip_tos, sk->ip_ttl);
-
-                       /*
-                        *      We don't care for RST, and non SYN are absorbed (old segments)
-                        *      Broadcast/multicast SYN isn't allowed. Note - bug if you change the
-                        *      netmask on a running connection it can go broadcast. Even Sun's have
-                        *      this problem so I'm ignoring it 
-                        */
-                          
-                       if(th->rst || !th->syn || th->ack || ip_chk_addr(daddr)!=IS_MYADDR)
-                       {
-                               kfree_skb(skb, FREE_READ);
-                               release_sock(sk);
-                               return 0;
-                       }
-               
-                       /*      
-                        *      Guess we need to make a new socket up 
-                        */
-               
-                       tcp_conn_request(sk, skb, daddr, saddr, opt, dev, tcp_init_seq());
-               
-                       /*
-                        *      Now we have several options: In theory there is nothing else
-                        *      in the frame. KA9Q has an option to send data with the syn,
-                        *      BSD accepts data with the syn up to the [to be] advertised window
-                        *      and Solaris 2.1 gives you a protocol error. For now we just ignore
-                        *      it, that fits the spec precisely and avoids incompatibilities. It
-                        *      would be nice in future to drop through and process the data.
-                        */
-                        
-                       release_sock(sk);
-                       return 0;
-               }
-       
-               /* retransmitted SYN? */
-               if (sk->state == TCP_SYN_RECV && th->syn && skb->seq+1 == sk->acked_seq)
-               {
-                       kfree_skb(skb, FREE_READ);
-                       release_sock(sk);
-                       return 0;
-               }
-               
-               /*
-                *      SYN sent means we have to look for a suitable ack and either reset
-                *      for bad matches or go to connected 
-                */
-          
-               if(sk->state==TCP_SYN_SENT)
-               {
-                       /* Crossed SYN or previous junk segment */
-                       if(th->ack)
-                       {
-                               /* We got an ack, but it's not a good ack */
-                               if(!tcp_ack(sk,th,saddr,len))
-                               {
-                                       /* Reset the ack - its an ack from a 
-                                          different connection  [ th->rst is checked in tcp_reset()] */
-                                       tcp_statistics.TcpAttemptFails++;
-                                       tcp_reset(daddr, saddr, th,
-                                               sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
-                                       kfree_skb(skb, FREE_READ);
-                                       release_sock(sk);
-                                       return(0);
-                               }
-                               if(th->rst)
-                                       return tcp_std_reset(sk,skb);
-                               if(!th->syn)
-                               {
-                                       /* A valid ack from a different connection
-                                          start. Shouldn't happen but cover it */
-                                       tcp_statistics.TcpAttemptFails++;
-                                        tcp_reset(daddr, saddr, th,
-                                                sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
-                                       kfree_skb(skb, FREE_READ);
-                                       release_sock(sk);
-                                       return 0;
-                               }
-                               /*
-                                *      Ok.. it's good. Set up sequence numbers and
-                                *      move to established.
-                                */
-                               syn_ok=1;       /* Don't reset this connection for the syn */
-                               sk->acked_seq = skb->seq+1;
-                               sk->lastwin_seq = skb->seq+1;
-                               sk->fin_seq = skb->seq;
-                               tcp_send_ack(sk->sent_seq,sk->acked_seq,sk,th,sk->daddr);
-                               tcp_set_state(sk, TCP_ESTABLISHED);
-                               tcp_options(sk,th);
-                               sk->dummy_th.dest=th->source;
-                               sk->copied_seq = sk->acked_seq;
-                               if(!sk->dead)
-                               {
-                                       sk->state_change(sk);
-                                       sock_wake_async(sk->socket, 0);
-                               }
-                               if(sk->max_window==0)
-                               {
-                                       sk->max_window = 32;
-                                       sk->mss = min(sk->max_window, sk->mtu);
-                               }
-                       }
-                       else
-                       {
-                               /* See if SYN's cross. Drop if boring */
-                               if(th->syn && !th->rst)
-                               {
-                                       /* Crossed SYN's are fine - but talking to
-                                          yourself is right out... */
-                                       if(sk->saddr==saddr && sk->daddr==daddr &&
-                                               sk->dummy_th.source==th->source &&
-                                               sk->dummy_th.dest==th->dest)
-                                       {
-                                               tcp_statistics.TcpAttemptFails++;
-                                               return tcp_std_reset(sk,skb);
-                                       }
-                                       tcp_set_state(sk,TCP_SYN_RECV);
-                                       
-                                       /*
-                                        *      FIXME:
-                                        *      Must send SYN|ACK here
-                                        */
-                               }               
-                               /* Discard junk segment */
-                               kfree_skb(skb, FREE_READ);
-                               release_sock(sk);
-                               return 0;
-                       }
-                       /*
-                        *      SYN_RECV with data maybe.. drop through
-                        */
-                       goto rfc_step6;
-               }
-
-       /*
-        *      BSD has a funny hack with TIME_WAIT and fast reuse of a port. There is
-        *      a more complex suggestion for fixing these reuse issues in RFC1644
-        *      but not yet ready for general use. Also see RFC1379.
-        */
-       
-#define BSD_TIME_WAIT
-#ifdef BSD_TIME_WAIT
-               if (sk->state == TCP_TIME_WAIT && th->syn && sk->dead && 
-                       after(skb->seq, sk->acked_seq) && !th->rst)
-               {
-                       u32 seq = sk->write_seq;
-                       if(sk->debug)
-                               printk("Doing a BSD time wait\n");
-                       tcp_statistics.TcpEstabResets++;           
-                       sk->rmem_alloc -= skb->truesize;
-                       skb->sk = NULL;
-                       sk->err=ECONNRESET;
-                       tcp_set_state(sk, TCP_CLOSE);
-                       sk->shutdown = SHUTDOWN_MASK;
-                       release_sock(sk);
-                       sk=get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
-                       if (sk && sk->state==TCP_LISTEN)
-                       {
-                               sk->inuse=1;
-                               skb->sk = sk;
-                               sk->rmem_alloc += skb->truesize;
-                               tcp_conn_request(sk, skb, daddr, saddr,opt, dev,seq+128000);
-                               release_sock(sk);
-                               return 0;
-                       }
-                       kfree_skb(skb, FREE_READ);
-                       return 0;
-               }
-#endif 
-       }
-
-       /*
-        *      We are now in normal data flow (see the step list in the RFC)
-        *      Note most of these are inline now. I'll inline the lot when
-        *      I have time to test it hard and look at what gcc outputs 
-        */
-       
-       if (!tcp_sequence(sk, skb->seq, skb->end_seq))
-       {
-               bad_tcp_sequence(sk, th, len, opt, saddr, dev);
-               kfree_skb(skb, FREE_READ);
-               release_sock(sk);
-               return 0;
-       }
-
-       if(th->rst)
-               return tcp_std_reset(sk,skb);
-       
-       /*
-        *      !syn_ok is effectively the state test in RFC793.
-        */
-        
-       if(th->syn && !syn_ok)
-       {
-               tcp_reset(daddr,saddr,th, &tcp_prot, opt, dev, skb->ip_hdr->tos, 255);
-               return tcp_std_reset(sk,skb);   
-       }
-
-
-       /*
-        *      Delayed ACK time estimator.
-        */
-       
-       if (sk->lrcvtime == 0) 
-       {
-               sk->lrcvtime = jiffies;
-               sk->ato = HZ/3;
-       }
-       else 
-       {
-               int m;
-               
-               m = jiffies - sk->lrcvtime;
-
-               sk->lrcvtime = jiffies;
-
-               if (m <= 0)
-                       m = 1;
-
-               if (m > (sk->rtt >> 3)) 
-               {
-                       sk->ato = sk->rtt >> 3;
-                       /*
-                        * printk(KERN_DEBUG "ato: rtt %lu\n", sk->ato);
-                        */
-               }
-               else 
-               {
-                       sk->ato = (sk->ato >> 1) + m;
-                       /*
-                        * printk(KERN_DEBUG "ato: m %lu\n", sk->ato);
-                        */
-               }
-       }
-         
-       /*
-        *      Process the ACK
-        */
-        
-
-       if(th->ack && !tcp_ack(sk,th,saddr,len))
-       {
-               /*
-                *      Our three way handshake failed.
-                */
-                
-               if(sk->state==TCP_SYN_RECV)
-               {
-                       tcp_reset(daddr, saddr, th,sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
-               }
-               kfree_skb(skb, FREE_READ);
-               release_sock(sk);
-               return 0;
-       }
-       
-rfc_step6:             /* I'll clean this up later */
-
-       /*
-        *      If the accepted buffer put us over our queue size we
-        *      now drop it (we must process the ack first to avoid
-        *      deadlock cases).
-        */
-        
-       if (sk->rmem_alloc  >= sk->rcvbuf) 
-       {
-               kfree_skb(skb, FREE_READ);
-               release_sock(sk);
-               return(0);
-       }
-
-
-       /*
-        *      Process urgent data
-        */
-               
-       if(tcp_urg(sk, th, saddr, len))
-       {
-               kfree_skb(skb, FREE_READ);
-               release_sock(sk);
-               return 0;
-       }
-       
-       /*
-        *      Process the encapsulated data
-        */
-       
-       if(tcp_data(skb,sk, saddr, len))
-       {
-               kfree_skb(skb, FREE_READ);
-               release_sock(sk);
-               return 0;
-       }
-
-       /*
-        *      And done
-        */     
-       
-       release_sock(sk);
-       return 0;
-
-no_tcp_socket:
-       /*
-        *      No such TCB. If th->rst is 0 send a reset (checked in tcp_reset)
-        */
-       tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
-       skb->sk = NULL;
-       /*
-        *      Discard frame
-        */
-       kfree_skb(skb, FREE_READ);
-       return 0;
-}
-
-/*
- *     This routine sends a packet with an out of date sequence
- *     number. It assumes the other end will try to ack it.
- */
-
-static void tcp_write_wakeup(struct sock *sk)
-{
-       struct sk_buff *buff,*skb;
-       struct tcphdr *t1;
-       struct device *dev=NULL;
-       int tmp;
-
-       if (sk->zapped)
-               return; /* After a valid reset we can send no more */
+       ptr = skb_put(buff,4);
+       ptr[0] = 2;
+       ptr[1] = 4;
+       ptr[2] = (sk->mtu) >> 8;
+       ptr[3] = (sk->mtu) & 0xff;
+       tcp_send_check(t1, sk->saddr, sk->daddr,
+                 sizeof(struct tcphdr) + 4, sk);
  
         /*
-        *      Write data can still be transmitted/retransmitted in the
-        *      following states.  If any other state is encountered, return.
-        *      [listen/close will never occur here anyway]
+        *      This must go first otherwise a really quick response will get reset. 
          */
  
-       if (sk->state != TCP_ESTABLISHED && 
-           sk->state != TCP_CLOSE_WAIT &&
-           sk->state != TCP_FIN_WAIT1 && 
-           sk->state != TCP_LAST_ACK &&
-           sk->state != TCP_CLOSING
-       ) 
-       {
-               return;
-       }
-       if ( before(sk->sent_seq, sk->window_seq) && 
-           (skb=skb_peek(&sk->write_queue)))
-       {
-               /*
-                * We are probing the opening of a window
-                * but the window size is != 0
-                * must have been a result SWS advoidance ( sender )
-                */
-           
-               struct iphdr *iph;
-               struct tcphdr *th;
-               struct tcphdr *nth;
-               unsigned long win_size;
-#if 0
-               unsigned long ow_size;
-#endif
-               void * tcp_data_start;
-       
-               /*
-                *      How many bytes can we send ?
-                */
-                
-               win_size = sk->window_seq - sk->sent_seq;
-
-               /*
-                *      Recover the buffer pointers
-                */
-                
-               iph = (struct iphdr *)skb->ip_hdr;
-               th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
-
-               /*
-                *      Grab the data for a temporary frame
-                */
-                
-               buff = sock_wmalloc(sk, win_size + th->doff * 4 + 
-                                    (iph->ihl << 2) +
-                                    sk->prot->max_header + 15, 
-                                    1, GFP_ATOMIC);
-               if ( buff == NULL )
-                       return;
-
-               /* 
-                *      If we strip the packet on the write queue we must
-                *      be ready to retransmit this one 
-                */
-           
-               buff->free = /*0*/1;
-
-               buff->sk = sk;
-               buff->localroute = sk->localroute;
-               
-               /*
-                *      Put headers on the new packet
-                */
-
-               tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
-                                        IPPROTO_TCP, sk->opt, buff->truesize,
-                                        sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
-               if (tmp < 0) 
-               {
-                       sock_wfree(sk, buff);
-                       return;
-               }
-               
-               /*
-                *      Move the TCP header over
-                */
-
-               buff->dev = dev;
-
-               nth = (struct tcphdr *) skb_put(buff,th->doff*4);
-
-               memcpy(nth, th, th->doff * 4);
-               
-               /*
-                *      Correct the new header
-                */
-                
-               nth->ack = 1; 
-               nth->ack_seq = htonl(sk->acked_seq);
-               nth->window = htons(tcp_select_window(sk));
-               nth->check = 0;
-
-               /*
-                *      Find the first data byte.
-                */
-                
-               tcp_data_start = (char *) th + (th->doff << 2);
-
-               /*
-                *      Add it to our new buffer
-                */
-                
-               memcpy(skb_put(buff,win_size), tcp_data_start, win_size);
-               
-               /*
-                *      Remember our right edge sequence number.
-                */
-                
-               buff->end_seq = sk->sent_seq + win_size;
-               sk->sent_seq = buff->end_seq;           /* Hack */
-               if(th->urg && ntohs(th->urg_ptr) < win_size)
-                       nth->urg = 0;
-
-               /*
-                *      Checksum the split buffer
-                */
-                
-               tcp_send_check(nth, sk->saddr, sk->daddr, 
-                          nth->doff * 4 + win_size , sk);
-       }
+       tcp_cache_zap();
+       tcp_set_state(sk,TCP_SYN_SENT);
+       if(rt&&rt->rt_flags&RTF_IRTT)
+               sk->rto = rt->rt_irtt;
         else
-       {       
-               buff = sock_wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
-               if (buff == NULL) 
-                       return;
-
-               buff->free = 1;
-               buff->sk = sk;
-               buff->localroute = sk->localroute;
-
-               /*
-                *      Put in the IP header and routing stuff. 
-                */
-                
-               tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
-                               IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
-               if (tmp < 0) 
-               {
-                       sock_wfree(sk, buff);
-                       return;
-               }
-
-               t1 = (struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
-               memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
-
-               /*
-                *      Use a previous sequence.
-                *      This should cause the other end to send an ack.
-                */
-        
-               t1->seq = htonl(sk->sent_seq-1);
-               t1->ack = 1; 
-               t1->res1= 0;
-               t1->res2= 0;
-               t1->rst = 0;
-               t1->urg = 0;
-               t1->psh = 0;
-               t1->fin = 0;    /* We are sending a 'previous' sequence, and 0 bytes of data - thus no FIN bit */
-               t1->syn = 0;
-               t1->ack_seq = htonl(sk->acked_seq);
-               t1->window = htons(tcp_select_window(sk));
-               t1->doff = sizeof(*t1)/4;
-               tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
-
-       }               
+               sk->rto = TCP_TIMEOUT_INIT;
+       sk->retransmit_timer.function=&tcp_retransmit_timer;
+       sk->retransmit_timer.data = (unsigned long)sk;
+       tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);  /* Timer for repeating the SYN until an answer  */
+       sk->retransmits = 0;                            /* Now works the right way instead of a hacked 
+                                                                                       initial setting */
  
-       /*
-        *      Send it.
-        */
-       
-       sk->prot->queue_xmit(sk, dev, buff, 1);
+       sk->prot->queue_xmit(sk, dev, buff, 0);  
+       tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
+       tcp_statistics.TcpActiveOpens++;
         tcp_statistics.TcpOutSegs++;
-}
-
-/*
- *     A window probe timeout has occurred.
- */
-
-void tcp_send_probe0(struct sock *sk)
-{
-       if (sk->zapped)
-               return;         /* After a valid reset we can send no more */
-
-       tcp_write_wakeup(sk);
-
-       sk->backoff++;
-       sk->rto = min(sk->rto << 1, 120*HZ);
-       sk->retransmits++;
-       sk->prot->retransmits ++;
-       reset_xmit_timer (sk, TIME_PROBE0, sk->rto);
+  
+       release_sock(sk);
+       return(0);
  }
  
  /*
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

new file mode 100644 (file)

index 0000000..6e33c14
--- /dev/null
+++ b/net/ipv4/tcp_input.c
@@ -0,0 +1,1909 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:    @(#)tcp_input.c 1.0.16  05/25/93
+ *
+ * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
+ *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *             Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *             Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *             Florian La Roche, <flla@stud.uni-sb.de>
+ *             Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *             Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *             Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *             Matthew Dillon, <dillon@apollo.west.oic.com>
+ *             Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *             Jorge Cwik, <jorge@laser.satlink.net>
+ */
+
+#include <linux/config.h>
+#include <net/tcp.h>
+
+/*
+ *     Cached last hit socket
+ */
+ 
+static volatile unsigned long  th_cache_saddr,th_cache_daddr;
+static volatile unsigned short  th_cache_dport, th_cache_sport;
+static volatile struct sock *th_cache_sk;
+
+void tcp_cache_zap(void)
+{
+       th_cache_sk=NULL;
+}
+
+/*
+ *     Find the socket, using the last hit cache if applicable.
+ */
+static inline struct sock * get_tcp_sock(u32 saddr, u16 sport, u32 daddr, u16 dport)
+{
+       struct sock * sk;
+
+       sk = (struct sock *) th_cache_sk;
+       if (!sk || saddr != th_cache_saddr || daddr != th_cache_daddr ||
+           sport != th_cache_sport || dport != th_cache_dport) {
+               sk = get_sock(&tcp_prot, dport, saddr, sport, daddr);
+               if (sk) {
+                       th_cache_saddr=saddr;
+                       th_cache_daddr=daddr;
+                       th_cache_dport=dport;
+                       th_cache_sport=sport;
+                       th_cache_sk=sk;
+               }
+       }
+       return sk;
+}
+
+/*
+ * React to a out-of-window TCP sequence number in an incoming packet
+ */
+static void bad_tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
+            struct options *opt, unsigned long saddr, struct device *dev)
+{
+       if (th->rst)
+               return;
+
+       /*
+        *      Send a reset if we get something not ours and we are
+        *      unsynchronized. Note: We don't do anything to our end. We
+        *      are just killing the bogus remote connection then we will
+        *      connect again and it will work (with luck).
+        */
+        
+       if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) 
+       {
+               tcp_send_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
+               return;
+       }
+
+       /* Try to resync things. */
+       tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
+       return;
+}
+
+/*
+ *     This functions checks to see if the tcp header is actually acceptable. 
+ */
+ 
+extern __inline__ int tcp_sequence(struct sock *sk, u32 seq, u32 end_seq)
+{
+       u32 end_window = sk->acked_seq + sk->window;
+       return  /* if start is at end of window, end must be too (zero window) */
+               (seq == end_window && seq == end_seq) ||
+               /* if start is before end of window, check for interest */
+               (before(seq, end_window) && !before(end_seq, sk->acked_seq));
+}
+
+/*
+ *     When we get a reset we do this.
+ */
+
+static int tcp_reset(struct sock *sk, struct sk_buff *skb)
+{
+       sk->zapped = 1;
+       sk->err = ECONNRESET;
+       if (sk->state == TCP_SYN_SENT)
+               sk->err = ECONNREFUSED;
+       if (sk->state == TCP_CLOSE_WAIT)
+               sk->err = EPIPE;
+#ifdef TCP_DO_RFC1337          
+       /*
+        *      Time wait assassination protection [RFC1337]
+        */
+       if(sk->state!=TCP_TIME_WAIT)
+       {       
+               tcp_set_state(sk,TCP_CLOSE);
+               sk->shutdown = SHUTDOWN_MASK;
+       }
+#else  
+       tcp_set_state(sk,TCP_CLOSE);
+       sk->shutdown = SHUTDOWN_MASK;
+#endif 
+       if (!sk->dead) 
+               sk->state_change(sk);
+       kfree_skb(skb, FREE_READ);
+       release_sock(sk);
+       return(0);
+}
+
+
+/*
+ *     Look for tcp options. Parses everything but only knows about MSS.
+ *     This routine is always called with the packet containing the SYN.
+ *     However it may also be called with the ack to the SYN.  So you
+ *     can't assume this is always the SYN.  It's always called after
+ *     we have set up sk->mtu to our own MTU.
+ *
+ *     We need at minimum to add PAWS support here. Possibly large windows
+ *     as Linux gets deployed on 100Mb/sec networks.
+ */
+ 
+static void tcp_options(struct sock *sk, struct tcphdr *th)
+{
+       unsigned char *ptr;
+       int length=(th->doff*4)-sizeof(struct tcphdr);
+       int mss_seen = 0;
+    
+       ptr = (unsigned char *)(th + 1);
+  
+       while(length>0)
+       {
+               int opcode=*ptr++;
+               int opsize=*ptr++;
+               switch(opcode)
+               {
+                       case TCPOPT_EOL:
+                               return;
+                       case TCPOPT_NOP:        /* Ref: RFC 793 section 3.1 */
+                               length--;
+                               ptr--;          /* the opsize=*ptr++ above was a mistake */
+                               continue;
+                       
+                       default:
+                               if(opsize<=2)   /* Avoid silly options looping forever */
+                                       return;
+                               switch(opcode)
+                               {
+                                       case TCPOPT_MSS:
+                                               if(opsize==4 && th->syn)
+                                               {
+                                                       sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
+                                                       mss_seen = 1;
+                                               }
+                                               break;
+                                               /* Add other options here as people feel the urge to implement stuff like large windows */
+                               }
+                               ptr+=opsize-2;
+                               length-=opsize;
+               }
+       }
+       if (th->syn) 
+       {
+               if (! mss_seen)
+                     sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
+       }
+#ifdef CONFIG_INET_PCTCP
+       sk->mss = min(sk->max_window >> 1, sk->mtu);
+#else    
+       sk->mss = min(sk->max_window, sk->mtu);
+       sk->max_unacked = 2 * sk->mss;
+#endif  
+}
+
+
+/*
+ *     This routine handles a connection request.
+ *     It should make sure we haven't already responded.
+ *     Because of the way BSD works, we have to send a syn/ack now.
+ *     This also means it will be harder to close a socket which is
+ *     listening.
+ */
+ 
+static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
+                u32 daddr, u32 saddr, struct options *opt, struct device *dev, u32 seq)
+{
+       struct sock *newsk;
+       struct tcphdr *th;
+       struct rtable *rt;
+  
+       th = skb->h.th;
+
+       /* If the socket is dead, don't accept the connection. */
+       if (!sk->dead) 
+       {
+               sk->data_ready(sk,0);
+       }
+       else 
+       {
+               if(sk->debug)
+                       printk("Reset on %p: Connect on dead socket.\n",sk);
+               tcp_send_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
+               tcp_statistics.TcpAttemptFails++;
+               kfree_skb(skb, FREE_READ);
+               return;
+       }
+
+       /*
+        * Make sure we can accept more.  This will prevent a
+        * flurry of syns from eating up all our memory.
+        */
+
+       if (sk->ack_backlog >= sk->max_ack_backlog) 
+       {
+               tcp_statistics.TcpAttemptFails++;
+               kfree_skb(skb, FREE_READ);
+               return;
+       }
+
+       /*
+        * We need to build a new sock struct.
+        * It is sort of bad to have a socket without an inode attached
+        * to it, but the wake_up's will just wake up the listening socket,
+        * and if the listening socket is destroyed before this is taken
+        * off of the queue, this will take care of it.
+        */
+
+       newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
+       if (newsk == NULL) 
+       {
+               /* just ignore the syn.  It will get retransmitted. */
+               tcp_statistics.TcpAttemptFails++;
+               kfree_skb(skb, FREE_READ);
+               return;
+       }
+
+       memcpy(newsk, sk, sizeof(*newsk));
+       newsk->opt = NULL;
+       newsk->ip_route_cache  = NULL;
+       if (opt && opt->optlen) {
+         sk->opt = (struct options*)kmalloc(sizeof(struct options)+opt->optlen, GFP_ATOMIC);
+         if (!sk->opt) {
+               kfree_s(newsk, sizeof(struct sock));
+               tcp_statistics.TcpAttemptFails++;
+               kfree_skb(skb, FREE_READ);
+               return;
+         }
+         if (ip_options_echo(sk->opt, opt, daddr, saddr, skb)) {
+               kfree_s(sk->opt, sizeof(struct options)+opt->optlen);
+               kfree_s(newsk, sizeof(struct sock));
+               tcp_statistics.TcpAttemptFails++;
+               kfree_skb(skb, FREE_READ);
+               return;
+         }
+       }
+       skb_queue_head_init(&newsk->write_queue);
+       skb_queue_head_init(&newsk->receive_queue);
+       newsk->send_head = NULL;
+       newsk->send_tail = NULL;
+       skb_queue_head_init(&newsk->back_log);
+       newsk->rtt = 0;         /*TCP_CONNECT_TIME<<3*/
+       newsk->rto = TCP_TIMEOUT_INIT;
+       newsk->mdev = 0;
+       newsk->max_window = 0;
+       newsk->cong_window = 1;
+       newsk->cong_count = 0;
+       newsk->ssthresh = 0;
+       newsk->backoff = 0;
+       newsk->blog = 0;
+       newsk->intr = 0;
+       newsk->proc = 0;
+       newsk->done = 0;
+       newsk->partial = NULL;
+       newsk->pair = NULL;
+       newsk->wmem_alloc = 0;
+       newsk->rmem_alloc = 0;
+       newsk->localroute = sk->localroute;
+
+       newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
+
+       newsk->err = 0;
+       newsk->shutdown = 0;
+       newsk->ack_backlog = 0;
+       newsk->acked_seq = skb->seq+1;
+       newsk->lastwin_seq = skb->seq+1;
+       newsk->delay_acks = 1;
+       newsk->copied_seq = skb->seq+1;
+       newsk->fin_seq = skb->seq;
+       newsk->state = TCP_SYN_RECV;
+       newsk->timeout = 0;
+       newsk->ip_xmit_timeout = 0;
+       newsk->write_seq = seq; 
+       newsk->window_seq = newsk->write_seq;
+       newsk->rcv_ack_seq = newsk->write_seq;
+       newsk->urg_data = 0;
+       newsk->retransmits = 0;
+       newsk->linger=0;
+       newsk->destroy = 0;
+       init_timer(&newsk->timer);
+       newsk->timer.data = (unsigned long)newsk;
+       newsk->timer.function = &net_timer;
+       init_timer(&newsk->retransmit_timer);
+       newsk->retransmit_timer.data = (unsigned long)newsk;
+       newsk->retransmit_timer.function=&tcp_retransmit_timer;
+       newsk->dummy_th.source = skb->h.th->dest;
+       newsk->dummy_th.dest = skb->h.th->source;
+       
+       /*
+        *      Swap these two, they are from our point of view. 
+        */
+        
+       newsk->daddr = saddr;
+       newsk->saddr = daddr;
+       newsk->rcv_saddr = daddr;
+
+       put_sock(newsk->num,newsk);
+       newsk->dummy_th.res1 = 0;
+       newsk->dummy_th.doff = 6;
+       newsk->dummy_th.fin = 0;
+       newsk->dummy_th.syn = 0;
+       newsk->dummy_th.rst = 0;        
+       newsk->dummy_th.psh = 0;
+       newsk->dummy_th.ack = 0;
+       newsk->dummy_th.urg = 0;
+       newsk->dummy_th.res2 = 0;
+       newsk->acked_seq = skb->seq + 1;
+       newsk->copied_seq = skb->seq + 1;
+       newsk->socket = NULL;
+
+       /*
+        *      Grab the ttl and tos values and use them 
+        */
+
+       newsk->ip_ttl=sk->ip_ttl;
+       newsk->ip_tos=skb->ip_hdr->tos;
+
+       /*
+        *      Use 512 or whatever user asked for 
+        */
+
+       /*
+        *      Note use of sk->user_mss, since user has no direct access to newsk 
+        */
+
+       rt = ip_rt_route(newsk->opt && newsk->opt->srr ? newsk->opt->faddr : saddr, 0);
+       newsk->ip_route_cache = rt;
+       
+       if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
+               newsk->window_clamp = rt->rt_window;
+       else
+               newsk->window_clamp = 0;
+               
+       if (sk->user_mss)
+               newsk->mtu = sk->user_mss;
+       else if (rt)
+               newsk->mtu = rt->rt_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
+       else 
+               newsk->mtu = 576 - sizeof(struct iphdr) - sizeof(struct tcphdr);
+
+       /*
+        *      But not bigger than device MTU 
+        */
+
+       newsk->mtu = min(newsk->mtu, dev->mtu - sizeof(struct iphdr) - sizeof(struct tcphdr));
+
+#ifdef CONFIG_SKIP
+       
+       /*
+        *      SKIP devices set their MTU to 65535. This is so they can take packets
+        *      unfragmented to security process then fragment. They could lie to the
+        *      TCP layer about a suitable MTU, but its easier to let skip sort it out
+        *      simply because the final package we want unfragmented is going to be
+        *
+        *      [IPHDR][IPSP][Security data][Modified TCP data][Security data]
+        */
+        
+       if(skip_pick_mtu!=NULL)         /* If SKIP is loaded.. */
+               sk->mtu=skip_pick_mtu(sk->mtu,dev);
+#endif
+       /*
+        *      This will min with what arrived in the packet 
+        */
+
+       tcp_options(newsk,skb->h.th);
+       
+       tcp_cache_zap();
+       tcp_send_synack(newsk, sk, skb);
+}
+
+/*
+ *     This routine deals with incoming acks, but not outgoing ones.
+ */
+
+static int tcp_ack(struct sock *sk, struct tcphdr *th, u32 ack, int len)
+{
+       int flag = 0;
+       unsigned window;
+
+       /* 
+        * 1 - there was data in packet as well as ack or new data is sent or 
+        *     in shutdown state
+        * 2 - data from retransmit queue was acked and removed
+        * 4 - window shrunk or data from retransmit queue was acked and removed
+        */
+
+       if(sk->zapped)
+               return(1);      /* Dead, cant ack any more so why bother */
+
+       /*
+        *      Have we discovered a larger window
+        */
+        
+       window = ntohs(th->window);
+
+       if (window > sk->max_window) 
+       {
+               sk->max_window = window;
+#ifdef CONFIG_INET_PCTCP
+               /* Hack because we don't send partial packets to non SWS
+                  handling hosts */
+               sk->mss = min(window>>1, sk->mtu);
+#else
+               sk->mss = min(window, sk->mtu);
+#endif 
+       }
+
+       /*
+        *      We have dropped back to keepalive timeouts. Thus we have
+        *      no retransmits pending.
+        */
+        
+       if (sk->retransmits && sk->ip_xmit_timeout == TIME_KEEPOPEN)
+               sk->retransmits = 0;
+
+       /*
+        *      If the ack is newer than sent or older than previous acks
+        *      then we can probably ignore it.
+        */
+        
+       if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
+       {
+               if(sk->debug)
+                       printk("Ack ignored %u %u\n",ack,sk->sent_seq);
+                       
+               /*
+                *      Keepalive processing.
+                */
+                
+               if (after(ack, sk->sent_seq)) 
+               {
+                       return(0);
+               }
+               
+               /*
+                *      Restart the keepalive timer.
+                */
+                
+               if (sk->keepopen) 
+               {
+                       if(sk->ip_xmit_timeout==TIME_KEEPOPEN)
+                               tcp_reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
+               }
+               return(1);
+       }
+
+       /*
+        *      If there is data set flag 1
+        */
+        
+       if (len != th->doff*4) 
+               flag |= 1;
+
+       /*
+        *      See if our window has been shrunk. 
+        */
+
+       if (after(sk->window_seq, ack+window)) 
+       {
+               /*
+                * We may need to move packets from the send queue
+                * to the write queue, if the window has been shrunk on us.
+                * The RFC says you are not allowed to shrink your window
+                * like this, but if the other end does, you must be able
+                * to deal with it.
+                */
+               struct sk_buff *skb;
+               struct sk_buff *skb2;
+               struct sk_buff *wskb = NULL;
+       
+               skb2 = sk->send_head;
+               sk->send_head = NULL;
+               sk->send_tail = NULL;
+       
+               /*
+                *      This is an artifact of a flawed concept. We want one
+                *      queue and a smarter send routine when we send all.
+                */
+       
+               flag |= 4;      /* Window changed */
+       
+               sk->window_seq = ack + window;
+               cli();
+               while (skb2 != NULL) 
+               {
+                       skb = skb2;
+                       skb2 = skb->link3;
+                       skb->link3 = NULL;
+                       if (after(skb->end_seq, sk->window_seq)) 
+                       {
+                               if (sk->packets_out > 0) 
+                                       sk->packets_out--;
+                               /* We may need to remove this from the dev send list. */
+                               if (skb->next != NULL) 
+                               {
+                                       skb_unlink(skb);                                
+                               }
+                               /* Now add it to the write_queue. */
+                               if (wskb == NULL)
+                                       skb_queue_head(&sk->write_queue,skb);
+                               else
+                                       skb_append(wskb,skb);
+                               wskb = skb;
+                       } 
+                       else 
+                       {
+                               if (sk->send_head == NULL) 
+                               {
+                                       sk->send_head = skb;
+                                       sk->send_tail = skb;
+                               }
+                               else
+                               {
+                                       sk->send_tail->link3 = skb;
+                                       sk->send_tail = skb;
+                               }
+                               skb->link3 = NULL;
+                       }
+               }
+               sti();
+       }
+
+       /*
+        *      Pipe has emptied
+        */
+        
+       if (sk->send_tail == NULL || sk->send_head == NULL) 
+       {
+               sk->send_head = NULL;
+               sk->send_tail = NULL;
+               sk->packets_out= 0;
+       }
+
+       /*
+        *      Update the right hand window edge of the host
+        */
+        
+       sk->window_seq = ack + window;
+
+       /*
+        *      We don't want too many packets out there. 
+        */
+        
+       if (sk->ip_xmit_timeout == TIME_WRITE && 
+               sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
+       {
+               /* 
+                * This is Jacobson's slow start and congestion avoidance. 
+                * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
+                * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
+                * counter and increment it once every cwnd times.  It's possible
+                * that this should be done only if sk->retransmits == 0.  I'm
+                * interpreting "new data is acked" as including data that has
+                * been retransmitted but is just now being acked.
+                */
+               if (sk->cong_window < sk->ssthresh)  
+                       /* 
+                        *      In "safe" area, increase
+                        */
+                       sk->cong_window++;
+               else 
+               {
+                       /*
+                        *      In dangerous area, increase slowly.  In theory this is
+                        *      sk->cong_window += 1 / sk->cong_window
+                        */
+                       if (sk->cong_count >= sk->cong_window) 
+                       {
+                               sk->cong_window++;
+                               sk->cong_count = 0;
+                       }
+                       else 
+                               sk->cong_count++;
+               }
+       }
+
+       /*
+        *      Remember the highest ack received.
+        */
+        
+       sk->rcv_ack_seq = ack;
+       
+       /*
+        *      We passed data and got it acked, remove any soft error
+        *      log. Something worked...
+        */
+        
+       sk->err_soft = 0;
+
+       /*
+        *      If this ack opens up a zero window, clear backoff.  It was
+        *      being used to time the probes, and is probably far higher than
+        *      it needs to be for normal retransmission.
+        */
+
+       if (sk->ip_xmit_timeout == TIME_PROBE0) 
+       {
+               sk->retransmits = 0;    /* Our probe was answered */
+               
+               /*
+                *      Was it a usable window open ?
+                */
+                
+               if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
+                   ! before (sk->window_seq, sk->write_queue.next->end_seq)) 
+               {
+                       sk->backoff = 0;
+                       
+                       /*
+                        *      Recompute rto from rtt.  this eliminates any backoff.
+                        */
+
+                       sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
+                       if (sk->rto > 120*HZ)
+                               sk->rto = 120*HZ;
+                       if (sk->rto < HZ/5)     /* Was 1*HZ, then 1 - turns out we must allow about
+                                                  .2 of a second because of BSD delayed acks - on a 100Mb/sec link
+                                                  .2 of a second is going to need huge windows (SIGH) */
+                       sk->rto = HZ/5;
+               }
+       }
+
+       /* 
+        *      See if we can take anything off of the retransmit queue.
+        */
+   
+       while(sk->send_head != NULL) 
+       {
+               /* Check for a bug. */
+               if (sk->send_head->link3 &&
+                   after(sk->send_head->end_seq, sk->send_head->link3->end_seq)) 
+                       printk("INET: tcp.c: *** bug send_list out of order.\n");
+                       
+               /*
+                *      If our packet is before the ack sequence we can
+                *      discard it as it's confirmed to have arrived the other end.
+                */
+                
+               if (before(sk->send_head->end_seq, ack+1)) 
+               {
+                       struct sk_buff *oskb;   
+                       if (sk->retransmits) 
+                       {       
+                               /*
+                                *      We were retransmitting.  don't count this in RTT est 
+                                */
+                               flag |= 2;
+
+                               /*
+                                * even though we've gotten an ack, we're still
+                                * retransmitting as long as we're sending from
+                                * the retransmit queue.  Keeping retransmits non-zero
+                                * prevents us from getting new data interspersed with
+                                * retransmissions.
+                                */
+
+                               if (sk->send_head->link3)       /* Any more queued retransmits? */
+                                       sk->retransmits = 1;
+                               else
+                                       sk->retransmits = 0;
+                       }
+                       /*
+                        * Note that we only reset backoff and rto in the
+                        * rtt recomputation code.  And that doesn't happen
+                        * if there were retransmissions in effect.  So the
+                        * first new packet after the retransmissions is
+                        * sent with the backoff still in effect.  Not until
+                        * we get an ack from a non-retransmitted packet do
+                        * we reset the backoff and rto.  This allows us to deal
+                        * with a situation where the network delay has increased
+                        * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
+                        */
+
+                       /*
+                        *      We have one less packet out there. 
+                        */
+                        
+                       if (sk->packets_out > 0) 
+                               sk->packets_out --;
+
+                       oskb = sk->send_head;
+
+                       if (!(flag&2))  /* Not retransmitting */
+                       {
+                               long m;
+       
+                               /*
+                                *      The following amusing code comes from Jacobson's
+                                *      article in SIGCOMM '88.  Note that rtt and mdev
+                                *      are scaled versions of rtt and mean deviation.
+                                *      This is designed to be as fast as possible 
+                                *      m stands for "measurement".
+                                */
+       
+                               m = jiffies - oskb->when;  /* RTT */
+                               if(m<=0)
+                                       m=1;            /* IS THIS RIGHT FOR <0 ??? */
+                               m -= (sk->rtt >> 3);    /* m is now error in rtt est */
+                               sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
+                               if (m < 0)
+                                       m = -m;         /* m is now abs(error) */
+                               m -= (sk->mdev >> 2);   /* similar update on mdev */
+                               sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
+       
+                               /*
+                                *      Now update timeout.  Note that this removes any backoff.
+                                */
+                        
+                               sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
+                               if (sk->rto > 120*HZ)
+                                       sk->rto = 120*HZ;
+                               if (sk->rto < HZ/5)     /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
+                                       sk->rto = HZ/5;
+                               sk->backoff = 0;
+                       }
+                       flag |= (2|4);  /* 2 is really more like 'don't adjust the rtt 
+                                          In this case as we just set it up */
+                       cli();
+                       oskb = sk->send_head;
+                       IS_SKB(oskb);
+                       sk->send_head = oskb->link3;
+                       if (sk->send_head == NULL) 
+                       {
+                               sk->send_tail = NULL;
+                       }
+
+               /*
+                *      We may need to remove this from the dev send list. 
+                */
+
+                       if (oskb->next)
+                               skb_unlink(oskb);
+                       sti();
+                       kfree_skb(oskb, FREE_WRITE); /* write. */
+                       if (!sk->dead)
+                               sk->write_space(sk);
+               }
+               else
+               {
+                       break;
+               }
+       }
+
+       /*
+        * XXX someone ought to look at this too.. at the moment, if skb_peek()
+        * returns non-NULL, we complete ignore the timer stuff in the else
+        * clause.  We ought to organize the code so that else clause can
+        * (should) be executed regardless, possibly moving the PROBE timer
+        * reset over.  The skb_peek() thing should only move stuff to the
+        * write queue, NOT also manage the timer functions.
+        */
+
+       /*
+        * Maybe we can take some stuff off of the write queue,
+        * and put it onto the xmit queue.
+        */
+       if (skb_peek(&sk->write_queue) != NULL) 
+       {
+               if (after (sk->window_seq+1, sk->write_queue.next->end_seq) &&
+                       (sk->retransmits == 0 || 
+                        sk->ip_xmit_timeout != TIME_WRITE ||
+                        before(sk->write_queue.next->end_seq, sk->rcv_ack_seq + 1))
+                       && sk->packets_out < sk->cong_window) 
+               {
+                       /*
+                        *      Add more data to the send queue.
+                        */
+                       flag |= 1;
+                       tcp_write_xmit(sk);
+               }
+               else if (before(sk->window_seq, sk->write_queue.next->end_seq) &&
+                       sk->send_head == NULL &&
+                       sk->ack_backlog == 0 &&
+                       sk->state != TCP_TIME_WAIT) 
+               {
+                       /*
+                        *      Data to queue but no room.
+                        */
+                       tcp_reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
+               }               
+       }
+       else
+       {
+               /*
+                * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
+                * from TCP_CLOSE we don't do anything
+                *
+                * from anything else, if there is write data (or fin) pending,
+                * we use a TIME_WRITE timeout, else if keepalive we reset to
+                * a KEEPALIVE timeout, else we delete the timer.
+                *
+                * We do not set flag for nominal write data, otherwise we may
+                * force a state where we start to write itsy bitsy tidbits
+                * of data.
+                */
+
+               switch(sk->state) {
+               case TCP_TIME_WAIT:
+                       /*
+                        * keep us in TIME_WAIT until we stop getting packets,
+                        * reset the timeout.
+                        */
+                       tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
+                       break;
+               case TCP_CLOSE:
+                       /*
+                        * don't touch the timer.
+                        */
+                       break;
+               default:
+                       /*
+                        *      Must check send_head, write_queue, and ack_backlog
+                        *      to determine which timeout to use.
+                        */
+                       if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) {
+                               tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
+                       } else if (sk->keepopen) {
+                               tcp_reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
+                       } else {
+                               del_timer(&sk->retransmit_timer);
+                               sk->ip_xmit_timeout = 0;
+                       }
+                       break;
+               }
+       }
+
+       /*
+        *      We have nothing queued but space to send. Send any partial
+        *      packets immediately (end of Nagle rule application).
+        */
+        
+       if (sk->packets_out == 0 && sk->partial != NULL &&
+               skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
+       {
+               flag |= 1;
+               tcp_send_partial(sk);
+       }
+
+       /*
+        * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
+        * we are now waiting for an acknowledge to our FIN.  The other end is
+        * already in TIME_WAIT.
+        *
+        * Move to TCP_CLOSE on success.
+        */
+
+       if (sk->state == TCP_LAST_ACK) 
+       {
+               if (!sk->dead)
+                       sk->state_change(sk);
+               if(sk->debug)
+                       printk("rcv_ack_seq: %X==%X, acked_seq: %X==%X\n",
+                               sk->rcv_ack_seq,sk->write_seq,sk->acked_seq,sk->fin_seq);
+               if (sk->rcv_ack_seq == sk->write_seq /*&& sk->acked_seq == sk->fin_seq*/) 
+               {
+                       flag |= 1;
+                       sk->shutdown = SHUTDOWN_MASK;
+                       tcp_set_state(sk,TCP_CLOSE);
+                       return 1;
+               }
+       }
+
+       /*
+        *      Incoming ACK to a FIN we sent in the case of our initiating the close.
+        *
+        *      Move to FIN_WAIT2 to await a FIN from the other end. Set
+        *      SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in.
+        */
+
+       if (sk->state == TCP_FIN_WAIT1) 
+       {
+
+               if (!sk->dead) 
+                       sk->state_change(sk);
+               if (sk->rcv_ack_seq == sk->write_seq) 
+               {
+                       flag |= 1;
+                       sk->shutdown |= SEND_SHUTDOWN;
+                       tcp_set_state(sk, TCP_FIN_WAIT2);
+               }
+       }
+
+       /*
+        *      Incoming ACK to a FIN we sent in the case of a simultaneous close.
+        *
+        *      Move to TIME_WAIT
+        */
+
+       if (sk->state == TCP_CLOSING) 
+       {
+
+               if (!sk->dead) 
+                       sk->state_change(sk);
+               if (sk->rcv_ack_seq == sk->write_seq) 
+               {
+                       flag |= 1;
+                       tcp_time_wait(sk);
+               }
+       }
+       
+       /*
+        *      Final ack of a three way shake 
+        */
+        
+       if(sk->state==TCP_SYN_RECV)
+       {
+               tcp_set_state(sk, TCP_ESTABLISHED);
+               tcp_options(sk,th);
+               sk->dummy_th.dest=th->source;
+               sk->copied_seq = sk->acked_seq;
+               if(!sk->dead)
+                       sk->state_change(sk);
+               if(sk->max_window==0)
+               {
+                       sk->max_window=32;      /* Sanity check */
+                       sk->mss=min(sk->max_window,sk->mtu);
+               }
+       }
+       
+       /*
+        * I make no guarantees about the first clause in the following
+        * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
+        * what conditions "!flag" would be true.  However I think the rest
+        * of the conditions would prevent that from causing any
+        * unnecessary retransmission. 
+        *   Clearly if the first packet has expired it should be 
+        * retransmitted.  The other alternative, "flag&2 && retransmits", is
+        * harder to explain:  You have to look carefully at how and when the
+        * timer is set and with what timeout.  The most recent transmission always
+        * sets the timer.  So in general if the most recent thing has timed
+        * out, everything before it has as well.  So we want to go ahead and
+        * retransmit some more.  If we didn't explicitly test for this
+        * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
+        * would not be true.  If you look at the pattern of timing, you can
+        * show that rto is increased fast enough that the next packet would
+        * almost never be retransmitted immediately.  Then you'd end up
+        * waiting for a timeout to send each packet on the retransmission
+        * queue.  With my implementation of the Karn sampling algorithm,
+        * the timeout would double each time.  The net result is that it would
+        * take a hideous amount of time to recover from a single dropped packet.
+        * It's possible that there should also be a test for TIME_WRITE, but
+        * I think as long as "send_head != NULL" and "retransmit" is on, we've
+        * got to be in real retransmission mode.
+        *   Note that tcp_do_retransmit is called with all==1.  Setting cong_window
+        * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
+        * As long as no further losses occur, this seems reasonable.
+        */
+       
+       if (((!flag) || (flag&4)) && sk->send_head != NULL &&
+              (((flag&2) && sk->retransmits) ||
+              (sk->send_head->when + sk->rto < jiffies))) 
+       {
+               if(sk->send_head->when + sk->rto < jiffies)
+                       tcp_retransmit(sk,0);   
+               else
+               {
+                       tcp_do_retransmit(sk, 1);
+                       tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
+               }
+       }
+
+       return(1);
+}
+
+
+/*
+ *     Process the FIN bit. This now behaves as it is supposed to work
+ *     and the FIN takes effect when it is validly part of sequence
+ *     space. Not before when we get holes.
+ *
+ *     If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
+ *     (and thence onto LAST-ACK and finally, CLOSE, we never enter
+ *     TIME-WAIT)
+ *
+ *     If we are in FINWAIT-1, a received FIN indicates simultaneous
+ *     close and we go into CLOSING (and later onto TIME-WAIT)
+ *
+ *     If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
+ *
+ */
+ 
+static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
+{
+       sk->fin_seq = skb->end_seq;
+
+       if (!sk->dead) 
+       {
+               sk->state_change(sk);
+               sock_wake_async(sk->socket, 1);
+       }
+
+       switch(sk->state) 
+       {
+               case TCP_SYN_RECV:
+               case TCP_SYN_SENT:
+               case TCP_ESTABLISHED:
+                       /*
+                        * move to CLOSE_WAIT, tcp_data() already handled
+                        * sending the ack.
+                        */
+                       tcp_set_state(sk,TCP_CLOSE_WAIT);
+                       if (th->rst)
+                               sk->shutdown = SHUTDOWN_MASK;
+                       break;
+
+               case TCP_CLOSE_WAIT:
+               case TCP_CLOSING:
+                       /*
+                        * received a retransmission of the FIN, do
+                        * nothing.
+                        */
+                       break;
+               case TCP_TIME_WAIT:
+                       /*
+                        * received a retransmission of the FIN,
+                        * restart the TIME_WAIT timer.
+                        */
+                       tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
+                       return(0);
+               case TCP_FIN_WAIT1:
+                       /*
+                        * This case occurs when a simultaneous close
+                        * happens, we must ack the received FIN and
+                        * enter the CLOSING state.
+                        *
+                        * This causes a WRITE timeout, which will either
+                        * move on to TIME_WAIT when we timeout, or resend
+                        * the FIN properly (maybe we get rid of that annoying
+                        * FIN lost hang). The TIME_WRITE code is already correct
+                        * for handling this timeout.
+                        */
+
+                       if(sk->ip_xmit_timeout != TIME_WRITE)
+                               tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
+                       tcp_set_state(sk,TCP_CLOSING);
+                       break;
+               case TCP_FIN_WAIT2:
+                       /*
+                        * received a FIN -- send ACK and enter TIME_WAIT
+                        */
+                       tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
+                       sk->shutdown|=SHUTDOWN_MASK;
+                       tcp_set_state(sk,TCP_TIME_WAIT);
+                       break;
+               case TCP_CLOSE:
+                       /*
+                        * already in CLOSE
+                        */
+                       break;
+               default:
+                       tcp_set_state(sk,TCP_LAST_ACK);
+       
+                       /* Start the timers. */
+                       tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
+                       return(0);
+       }
+
+       return(0);
+}
+
+
+
+/*
+ *     This routine handles the data.  If there is room in the buffer,
+ *     it will be have already been moved into it.  If there is no
+ *     room, then we will just have to discard the packet.
+ */
+
+static int tcp_data(struct sk_buff *skb, struct sock *sk, 
+        unsigned long saddr, unsigned short len)
+{
+       struct sk_buff *skb1, *skb2;
+       struct tcphdr *th;
+       int dup_dumped=0;
+       u32 new_seq, shut_seq;
+
+       th = skb->h.th;
+       skb_pull(skb,th->doff*4);
+       skb_trim(skb,len-(th->doff*4));
+
+       /*
+        *      The bytes in the receive read/assembly queue has increased. Needed for the
+        *      low memory discard algorithm 
+        */
+          
+       sk->bytes_rcv += skb->len;
+       
+       if (skb->len == 0 && !th->fin) 
+       {
+               /* 
+                *      Don't want to keep passing ack's back and forth. 
+                *      (someone sent us dataless, boring frame)
+                */
+               if (!th->ack)
+                       tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
+               kfree_skb(skb, FREE_READ);
+               return(0);
+       }
+       
+       /*
+        *      We no longer have anyone receiving data on this connection.
+        */
+
+#ifndef TCP_DONT_RST_SHUTDOWN           
+
+       if(sk->shutdown & RCV_SHUTDOWN)
+       {
+               /*
+                *      FIXME: BSD has some magic to avoid sending resets to
+                *      broken 4.2 BSD keepalives. Much to my surprise a few non
+                *      BSD stacks still have broken keepalives so we want to
+                *      cope with it.
+                */
+
+               if(skb->len)    /* We don't care if it's just an ack or
+                                  a keepalive/window probe */
+               {
+                       new_seq = skb->seq + skb->len + th->syn;        /* Right edge of _data_ part of frame */
+                       
+                       /* Do this the way 4.4BSD treats it. Not what I'd
+                          regard as the meaning of the spec but it's what BSD
+                          does and clearly they know everything 8) */
+
+                       /*
+                        *      This is valid because of two things
+                        *
+                        *      a) The way tcp_data behaves at the bottom.
+                        *      b) A fin takes effect when read not when received.
+                        */
+                        
+                       shut_seq = sk->acked_seq+1;     /* Last byte */
+                       
+                       if(after(new_seq,shut_seq))
+                       {
+                               if(sk->debug)
+                                       printk("Data arrived on %p after close [Data right edge %X, Socket shut on %X] %d\n",
+                                               sk, new_seq, shut_seq, sk->blog);
+                               if(sk->dead)
+                               {
+                                       sk->acked_seq = new_seq + th->fin;
+                                       tcp_send_reset(sk->saddr, sk->daddr, skb->h.th,
+                                               sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
+                                       tcp_statistics.TcpEstabResets++;
+                                       sk->err = EPIPE;
+                                       sk->error_report(sk);
+                                       sk->shutdown = SHUTDOWN_MASK;
+                                       tcp_set_state(sk,TCP_CLOSE);
+                                       kfree_skb(skb, FREE_READ);
+                                       return 0;
+                               }
+                       }
+               }
+       }
+
+#endif
+
+       /*
+        *      Now we have to walk the chain, and figure out where this one
+        *      goes into it.  This is set up so that the last packet we received
+        *      will be the first one we look at, that way if everything comes
+        *      in order, there will be no performance loss, and if they come
+        *      out of order we will be able to fit things in nicely.
+        *
+        *      [AC: This is wrong. We should assume in order first and then walk
+        *       forwards from the first hole based upon real traffic patterns.]
+        *      
+        */
+
+       if (skb_peek(&sk->receive_queue) == NULL)       /* Empty queue is easy case */
+       {
+               skb_queue_head(&sk->receive_queue,skb);
+               skb1= NULL;
+       } 
+       else
+       {
+               for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
+               {
+                       if(sk->debug)
+                       {
+                               printk("skb1=%p :", skb1);
+                               printk("skb1->seq = %d: ", skb1->seq);
+                               printk("skb->seq = %d\n",skb->seq);
+                               printk("copied_seq = %d acked_seq = %d\n", sk->copied_seq,
+                                               sk->acked_seq);
+                       }
+                       
+                       /*
+                        *      Optimisation: Duplicate frame or extension of previous frame from
+                        *      same sequence point (lost ack case).
+                        *      The frame contains duplicate data or replaces a previous frame
+                        *      discard the previous frame (safe as sk->inuse is set) and put
+                        *      the new one in its place.
+                        */
+                        
+                       if (skb->seq==skb1->seq && skb->len>=skb1->len)
+                       {
+                               skb_append(skb1,skb);
+                               skb_unlink(skb1);
+                               kfree_skb(skb1,FREE_READ);
+                               dup_dumped=1;
+                               skb1=NULL;
+                               break;
+                       }
+                       
+                       /*
+                        *      Found where it fits
+                        */
+                        
+                       if (after(skb->seq+1, skb1->seq))
+                       {
+                               skb_append(skb1,skb);
+                               break;
+                       }
+                       
+                       /*
+                        *      See if we've hit the start. If so insert.
+                        */
+                       if (skb1 == skb_peek(&sk->receive_queue))
+                       {
+                               skb_queue_head(&sk->receive_queue, skb);
+                               break;
+                       }
+               }
+       }
+
+       /*
+        *      Figure out what the ack value for this frame is
+        */
+        
+       if (before(sk->acked_seq, sk->copied_seq)) 
+       {
+               printk("*** tcp.c:tcp_data bug acked < copied\n");
+               sk->acked_seq = sk->copied_seq;
+       }
+
+       /*
+        *      Now figure out if we can ack anything. This is very messy because we really want two
+        *      receive queues, a completed and an assembly queue. We also want only one transmit
+        *      queue.
+        */
+
+       if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(skb->seq, sk->acked_seq+1)) 
+       {
+               if (before(skb->seq, sk->acked_seq+1)) 
+               {
+
+                       if (after(skb->end_seq, sk->acked_seq)) 
+                               sk->acked_seq = skb->end_seq;
+
+                       skb->acked = 1;
+
+                       /*
+                        *      When we ack the fin, we do the FIN 
+                        *      processing.
+                        */
+
+                       if (skb->h.th->fin) 
+                       {
+                               tcp_fin(skb,sk,skb->h.th);
+                       }
+         
+                       for(skb2 = skb->next;
+                           skb2 != (struct sk_buff *)&sk->receive_queue;
+                           skb2 = skb2->next) 
+                       {
+                               if (before(skb2->seq, sk->acked_seq+1)) 
+                               {
+                                       if (after(skb2->end_seq, sk->acked_seq))
+                                               sk->acked_seq = skb2->end_seq;
+
+                                       skb2->acked = 1;
+                                       /*
+                                        *      When we ack the fin, we do
+                                        *      the fin handling.
+                                        */
+                                       if (skb2->h.th->fin) 
+                                       {
+                                               tcp_fin(skb,sk,skb->h.th);
+                                       }
+
+                                       /*
+                                        *      Force an immediate ack.
+                                        */
+                                        
+                                       sk->ack_backlog = sk->max_ack_backlog;
+                               }
+                               else
+                               {
+                                       break;
+                               }
+                       }
+
+                       /*
+                        *      This also takes care of updating the window.
+                        *      This if statement needs to be simplified.
+                        *
+                        *      rules for delaying an ack:
+                        *      - delay time <= 0.5 HZ
+                        *      - we don't have a window update to send
+                        *      - must send at least every 2 full sized packets
+                        */
+                       if (!sk->delay_acks ||
+                           sk->ack_backlog >= sk->max_ack_backlog || 
+                           sk->bytes_rcv > sk->max_unacked || th->fin ||
+                           sk->ato > HZ/2 ||
+                           tcp_raise_window(sk)) {
+       /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
+                       }
+                       else 
+                       {
+                               sk->ack_backlog++;
+                               
+                               if(sk->debug)                           
+                                       printk("Ack queued.\n");
+                               tcp_reset_xmit_timer(sk, TIME_WRITE, sk->ato);
+                       }
+               }
+       }
+
+       /*
+        *      If we've missed a packet, send an ack.
+        *      Also start a timer to send another.
+        */
+        
+       if (!skb->acked) 
+       {
+       
+       /*
+        *      This is important.  If we don't have much room left,
+        *      we need to throw out a few packets so we have a good
+        *      window.  Note that mtu is used, not mss, because mss is really
+        *      for the send side.  He could be sending us stuff as large as mtu.
+        */
+                
+               while (sock_rspace(sk) < sk->mtu) 
+               {
+                       skb1 = skb_peek(&sk->receive_queue);
+                       if (skb1 == NULL) 
+                       {
+                               printk("INET: tcp.c:tcp_data memory leak detected.\n");
+                               break;
+                       }
+
+                       /*
+                        *      Don't throw out something that has been acked. 
+                        */
+                
+                       if (skb1->acked) 
+                       {
+                               break;
+                       }
+               
+                       skb_unlink(skb1);
+                       kfree_skb(skb1, FREE_READ);
+               }
+               tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
+               sk->ack_backlog++;
+               tcp_reset_xmit_timer(sk, TIME_WRITE, min(sk->ato, 0.5 * HZ));
+       }
+       else
+       {
+               tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
+       }
+
+       /*
+        *      Now tell the user we may have some data. 
+        */
+        
+       if (!sk->dead) 
+       {
+               if(sk->debug)
+                       printk("Data wakeup.\n");
+               sk->data_ready(sk,0);
+       } 
+       return(0);
+}
+
+
+/*
+ *     This routine is only called when we have urgent data
+ *     signalled. Its the 'slow' part of tcp_urg. It could be
+ *     moved inline now as tcp_urg is only called from one
+ *     place. We handle URGent data wrong. We have to - as
+ *     BSD still doesn't use the correction from RFC961.
+ */
+ 
+static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
+{
+       u32 ptr = ntohs(th->urg_ptr);
+
+       if (ptr)
+               ptr--;
+       ptr += ntohl(th->seq);
+
+       /* ignore urgent data that we've already seen and read */
+       if (after(sk->copied_seq, ptr))
+               return;
+
+       /* do we already have a newer (or duplicate) urgent pointer? */
+       if (sk->urg_data && !after(ptr, sk->urg_seq))
+               return;
+
+       /* tell the world about our new urgent pointer */
+       if (sk->proc != 0) {
+               if (sk->proc > 0) {
+                       kill_proc(sk->proc, SIGURG, 1);
+               } else {
+                       kill_pg(-sk->proc, SIGURG, 1);
+               }
+       }
+       sk->urg_data = URG_NOTYET;
+       sk->urg_seq = ptr;
+}
+
+/*
+ *     This is the 'fast' part of urgent handling.
+ */
+ 
+static inline void tcp_urg(struct sock *sk, struct tcphdr *th, unsigned long len)
+{
+       /*
+        *      Check if we get a new urgent pointer - normally not 
+        */
+        
+       if (th->urg)
+               tcp_check_urg(sk,th);
+
+       /*
+        *      Do we wait for any urgent data? - normally not
+        */
+        
+       if (sk->urg_data == URG_NOTYET) {
+               u32 ptr;
+
+               /*
+                *      Is the urgent pointer pointing into this packet? 
+                */      
+               ptr = sk->urg_seq - ntohl(th->seq) + th->doff*4;
+               if (ptr < len) {
+                       sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
+                       if (!sk->dead)
+                               sk->data_ready(sk,0);
+               }
+       }
+}
+
+
+/*
+ *     A TCP packet has arrived.
+ *             skb->h.raw is the TCP header.
+ */
+ 
+int tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
+       __u32 daddr, unsigned short len,
+       __u32 saddr, int redo, struct inet_protocol * protocol)
+{
+       struct tcphdr *th;
+       struct sock *sk;
+       int syn_ok=0;
+
+       /*
+        * "redo" is 1 if we have already seen this skb but couldn't
+        * use it at that time (the socket was locked).  In that case
+        * we have already done a lot of the work (looked up the socket
+        * etc).
+        */
+       th = skb->h.th;
+       sk = skb->sk;
+       if (!redo) {
+               tcp_statistics.TcpInSegs++;
+               if (skb->pkt_type!=PACKET_HOST)
+                       goto discard_it;
+
+               /*
+                *      Pull up the IP header.
+                */
+               skb_pull(skb, skb->h.raw-skb->data);
+
+               /*
+                *      Try to use the device checksum if provided.
+                */
+               switch (skb->ip_summed) {
+                       case CHECKSUM_NONE:
+                               skb->csum = csum_partial((char *)th, len, 0);
+                       case CHECKSUM_HW:
+                               if (tcp_check(th, len, saddr, daddr, skb->csum))
+                                       goto discard_it;
+                       default:
+                               /* CHECKSUM_UNNECESSARY */
+               }
+               sk = get_tcp_sock(saddr, th->source, daddr, th->dest);
+               if (!sk)
+                       goto no_tcp_socket;
+               skb->sk = sk;
+               skb->seq = ntohl(th->seq);
+               skb->end_seq = skb->seq + th->syn + th->fin + len - th->doff*4;
+               skb->ack_seq = ntohl(th->ack_seq);
+
+               skb->acked = 0;
+               skb->used = 0;
+               skb->free = 0;
+               skb->saddr = daddr;
+               skb->daddr = saddr;
+       
+               /* We may need to add it to the backlog here. */
+               cli();
+               if (sk->inuse) 
+               {
+                       skb_queue_tail(&sk->back_log, skb);
+                       sti();
+                       return(0);
+               }
+               sk->inuse = 1;
+               sti();
+       }
+
+       /*
+        *      If this socket has got a reset it's to all intents and purposes 
+        *      really dead. Count closed sockets as dead.
+        *
+        *      Note: BSD appears to have a bug here. A 'closed' TCP in BSD
+        *      simply drops data. This seems incorrect as a 'closed' TCP doesn't
+        *      exist so should cause resets as if the port was unreachable.
+        */
+
+       if (sk->zapped || sk->state==TCP_CLOSE)
+               goto no_tcp_socket;
+
+       if (!sk->prot) 
+       {
+               printk("IMPOSSIBLE 3\n");
+               return(0);
+       }
+
+
+       /*
+        *      Charge the memory to the socket. 
+        */
+        
+       skb->sk=sk;
+       sk->rmem_alloc += skb->truesize;
+
+       /*
+        *      This basically follows the flow suggested by RFC793, with the corrections in RFC1122. We
+        *      don't implement precedence and we process URG incorrectly (deliberately so) for BSD bug
+        *      compatibility. We also set up variables more thoroughly [Karn notes in the
+        *      KA9Q code the RFC793 incoming segment rules don't initialise the variables for all paths].
+        */
+
+       if(sk->state!=TCP_ESTABLISHED)          /* Skip this lot for normal flow */
+       {
+       
+               /*
+                *      Now deal with unusual cases.
+                */
+        
+               if(sk->state==TCP_LISTEN)
+               {
+                       if(th->ack)     /* These use the socket TOS.. might want to be the received TOS */
+                               tcp_send_reset(daddr,saddr,th,sk->prot,opt,dev,sk->ip_tos, sk->ip_ttl);
+
+                       /*
+                        *      We don't care for RST, and non SYN are absorbed (old segments)
+                        *      Broadcast/multicast SYN isn't allowed. Note - bug if you change the
+                        *      netmask on a running connection it can go broadcast. Even Sun's have
+                        *      this problem so I'm ignoring it 
+                        */
+                          
+                       if(th->rst || !th->syn || th->ack || ip_chk_addr(daddr)!=IS_MYADDR)
+                       {
+                               kfree_skb(skb, FREE_READ);
+                               release_sock(sk);
+                               return 0;
+                       }
+               
+                       /*      
+                        *      Guess we need to make a new socket up 
+                        */
+               
+                       tcp_conn_request(sk, skb, daddr, saddr, opt, dev, tcp_init_seq());
+               
+                       /*
+                        *      Now we have several options: In theory there is nothing else
+                        *      in the frame. KA9Q has an option to send data with the syn,
+                        *      BSD accepts data with the syn up to the [to be] advertised window
+                        *      and Solaris 2.1 gives you a protocol error. For now we just ignore
+                        *      it, that fits the spec precisely and avoids incompatibilities. It
+                        *      would be nice in future to drop through and process the data.
+                        */
+                        
+                       release_sock(sk);
+                       return 0;
+               }
+       
+               /* retransmitted SYN? */
+               if (sk->state == TCP_SYN_RECV && th->syn && skb->seq+1 == sk->acked_seq)
+               {
+                       kfree_skb(skb, FREE_READ);
+                       release_sock(sk);
+                       return 0;
+               }
+               
+               /*
+                *      SYN sent means we have to look for a suitable ack and either reset
+                *      for bad matches or go to connected 
+                */
+          
+               if(sk->state==TCP_SYN_SENT)
+               {
+                       /* Crossed SYN or previous junk segment */
+                       if(th->ack)
+                       {
+                               /* We got an ack, but it's not a good ack */
+                               if(!tcp_ack(sk,th,skb->ack_seq,len))
+                               {
+                                       /* Reset the ack - its an ack from a 
+                                          different connection  [ th->rst is checked in tcp_send_reset()] */
+                                       tcp_statistics.TcpAttemptFails++;
+                                       tcp_send_reset(daddr, saddr, th,
+                                               sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
+                                       kfree_skb(skb, FREE_READ);
+                                       release_sock(sk);
+                                       return(0);
+                               }
+                               if(th->rst)
+                                       return tcp_reset(sk,skb);
+                               if(!th->syn)
+                               {
+                                       /* A valid ack from a different connection
+                                          start. Shouldn't happen but cover it */
+                                       tcp_statistics.TcpAttemptFails++;
+                                        tcp_send_reset(daddr, saddr, th,
+                                                sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
+                                       kfree_skb(skb, FREE_READ);
+                                       release_sock(sk);
+                                       return 0;
+                               }
+                               /*
+                                *      Ok.. it's good. Set up sequence numbers and
+                                *      move to established.
+                                */
+                               syn_ok=1;       /* Don't reset this connection for the syn */
+                               sk->acked_seq = skb->seq+1;
+                               sk->lastwin_seq = skb->seq+1;
+                               sk->fin_seq = skb->seq;
+                               tcp_send_ack(sk->sent_seq,sk->acked_seq,sk,th,sk->daddr);
+                               tcp_set_state(sk, TCP_ESTABLISHED);
+                               tcp_options(sk,th);
+                               sk->dummy_th.dest=th->source;
+                               sk->copied_seq = sk->acked_seq;
+                               if(!sk->dead)
+                               {
+                                       sk->state_change(sk);
+                                       sock_wake_async(sk->socket, 0);
+                               }
+                               if(sk->max_window==0)
+                               {
+                                       sk->max_window = 32;
+                                       sk->mss = min(sk->max_window, sk->mtu);
+                               }
+                       }
+                       else
+                       {
+                               /* See if SYN's cross. Drop if boring */
+                               if(th->syn && !th->rst)
+                               {
+                                       /* Crossed SYN's are fine - but talking to
+                                          yourself is right out... */
+                                       if(sk->saddr==saddr && sk->daddr==daddr &&
+                                               sk->dummy_th.source==th->source &&
+                                               sk->dummy_th.dest==th->dest)
+                                       {
+                                               tcp_statistics.TcpAttemptFails++;
+                                               return tcp_reset(sk,skb);
+                                       }
+                                       tcp_set_state(sk,TCP_SYN_RECV);
+                                       
+                                       /*
+                                        *      FIXME:
+                                        *      Must send SYN|ACK here
+                                        */
+                               }               
+                               /* Discard junk segment */
+                               kfree_skb(skb, FREE_READ);
+                               release_sock(sk);
+                               return 0;
+                       }
+                       /*
+                        *      SYN_RECV with data maybe.. drop through
+                        */
+                       goto rfc_step6;
+               }
+
+       /*
+        *      BSD has a funny hack with TIME_WAIT and fast reuse of a port. There is
+        *      a more complex suggestion for fixing these reuse issues in RFC1644
+        *      but not yet ready for general use. Also see RFC1379.
+        */
+       
+#define BSD_TIME_WAIT
+#ifdef BSD_TIME_WAIT
+               if (sk->state == TCP_TIME_WAIT && th->syn && sk->dead && 
+                       after(skb->seq, sk->acked_seq) && !th->rst)
+               {
+                       u32 seq = sk->write_seq;
+                       if(sk->debug)
+                               printk("Doing a BSD time wait\n");
+                       tcp_statistics.TcpEstabResets++;           
+                       sk->rmem_alloc -= skb->truesize;
+                       skb->sk = NULL;
+                       sk->err=ECONNRESET;
+                       tcp_set_state(sk, TCP_CLOSE);
+                       sk->shutdown = SHUTDOWN_MASK;
+                       release_sock(sk);
+                       sk=get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
+                       if (sk && sk->state==TCP_LISTEN)
+                       {
+                               sk->inuse=1;
+                               skb->sk = sk;
+                               sk->rmem_alloc += skb->truesize;
+                               tcp_conn_request(sk, skb, daddr, saddr,opt, dev,seq+128000);
+                               release_sock(sk);
+                               return 0;
+                       }
+                       kfree_skb(skb, FREE_READ);
+                       return 0;
+               }
+#endif 
+       }
+
+       /*
+        *      We are now in normal data flow (see the step list in the RFC)
+        *      Note most of these are inline now. I'll inline the lot when
+        *      I have time to test it hard and look at what gcc outputs 
+        */
+       
+       if (!tcp_sequence(sk, skb->seq, skb->end_seq))
+       {
+               bad_tcp_sequence(sk, th, len, opt, saddr, dev);
+               kfree_skb(skb, FREE_READ);
+               release_sock(sk);
+               return 0;
+       }
+
+       if(th->rst)
+               return tcp_reset(sk,skb);
+       
+       /*
+        *      !syn_ok is effectively the state test in RFC793.
+        */
+        
+       if(th->syn && !syn_ok)
+       {
+               tcp_send_reset(daddr,saddr,th, &tcp_prot, opt, dev, skb->ip_hdr->tos, 255);
+               return tcp_reset(sk,skb);       
+       }
+
+
+       /*
+        *      Delayed ACK time estimator.
+        */
+       
+       if (sk->lrcvtime == 0) 
+       {
+               sk->lrcvtime = jiffies;
+               sk->ato = HZ/3;
+       }
+       else 
+       {
+               int m;
+               
+               m = jiffies - sk->lrcvtime;
+
+               sk->lrcvtime = jiffies;
+
+               if (m <= 0)
+                       m = 1;
+
+               if (m > (sk->rtt >> 3)) 
+               {
+                       sk->ato = sk->rtt >> 3;
+                       /*
+                        * printk(KERN_DEBUG "ato: rtt %lu\n", sk->ato);
+                        */
+               }
+               else 
+               {
+                       sk->ato = (sk->ato >> 1) + m;
+                       /*
+                        * printk(KERN_DEBUG "ato: m %lu\n", sk->ato);
+                        */
+               }
+       }
+         
+       /*
+        *      Process the ACK
+        */
+        
+
+       if(th->ack && !tcp_ack(sk,th,skb->ack_seq,len))
+       {
+               /*
+                *      Our three way handshake failed.
+                */
+                
+               if(sk->state==TCP_SYN_RECV)
+               {
+                       tcp_send_reset(daddr, saddr, th,sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
+               }
+               kfree_skb(skb, FREE_READ);
+               release_sock(sk);
+               return 0;
+       }
+       
+rfc_step6:             /* I'll clean this up later */
+
+       /*
+        *      If the accepted buffer put us over our queue size we
+        *      now drop it (we must process the ack first to avoid
+        *      deadlock cases).
+        */
+        
+       if (sk->rmem_alloc  >= sk->rcvbuf) 
+       {
+               kfree_skb(skb, FREE_READ);
+               release_sock(sk);
+               return(0);
+       }
+
+
+       /*
+        *      Process urgent data
+        */
+               
+       tcp_urg(sk, th, len);
+       
+       /*
+        *      Process the encapsulated data
+        */
+       
+       if(tcp_data(skb,sk, saddr, len))
+       {
+               kfree_skb(skb, FREE_READ);
+               release_sock(sk);
+               return 0;
+       }
+
+       /*
+        *      And done
+        */     
+       
+       release_sock(sk);
+       return 0;
+
+no_tcp_socket:
+       /*
+        *      No such TCB. If th->rst is 0 send a reset (checked in tcp_send_reset)
+        */
+       tcp_send_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
+
+discard_it:
+       /*
+        *      Discard frame
+        */
+       skb->sk = NULL;
+       kfree_skb(skb, FREE_READ);
+       return 0;
+}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

new file mode 100644 (file)

index 0000000..85704ea
--- /dev/null
+++ b/net/ipv4/tcp_output.c
@@ -0,0 +1,1099 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:    @(#)tcp_input.c 1.0.16  05/25/93
+ *
+ * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
+ *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *             Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *             Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *             Florian La Roche, <flla@stud.uni-sb.de>
+ *             Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *             Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *             Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *             Matthew Dillon, <dillon@apollo.west.oic.com>
+ *             Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *             Jorge Cwik, <jorge@laser.satlink.net>
+ */
+
+#include <linux/config.h>
+#include <net/tcp.h>
+
+/*
+ *     This is the main buffer sending routine. We queue the buffer
+ *     having checked it is sane seeming.
+ */
+ 
+void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
+{
+       int size;
+       struct tcphdr * th = skb->h.th;
+
+       /*
+        *      length of packet (not counting length of pre-tcp headers) 
+        */
+        
+       size = skb->len - ((unsigned char *) th - skb->data);
+
+       /*
+        *      Sanity check it.. 
+        */
+        
+       if (size < sizeof(struct tcphdr) || size > skb->len) 
+       {
+               printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
+                       skb, skb->data, th, skb->len);
+               kfree_skb(skb, FREE_WRITE);
+               return;
+       }
+
+       /*
+        *      If we have queued a header size packet.. (these crash a few
+        *      tcp stacks if ack is not set)
+        */
+        
+       if (size == sizeof(struct tcphdr)) 
+       {
+               /* If it's got a syn or fin it's notionally included in the size..*/
+               if(!th->syn && !th->fin) 
+               {
+                       printk("tcp_send_skb: attempt to queue a bogon.\n");
+                       kfree_skb(skb,FREE_WRITE);
+                       return;
+               }
+       }
+
+       /*
+        *      Actual processing.
+        */
+        
+       tcp_statistics.TcpOutSegs++;  
+       skb->seq = ntohl(th->seq);
+       skb->end_seq = skb->seq + size - 4*th->doff;
+       
+       /*
+        *      We must queue if
+        *
+        *      a) The right edge of this frame exceeds the window
+        *      b) We are retransmitting (Nagle's rule)
+        *      c) We have too many packets 'in flight'
+        */
+        
+       if (after(skb->end_seq, sk->window_seq) ||
+           (sk->retransmits && sk->ip_xmit_timeout == TIME_WRITE) ||
+            sk->packets_out >= sk->cong_window) 
+       {
+               /* checksum will be supplied by tcp_write_xmit.  So
+                * we shouldn't need to set it at all.  I'm being paranoid */
+               th->check = 0;
+               if (skb->next != NULL) 
+               {
+                       printk("tcp_send_partial: next != NULL\n");
+                       skb_unlink(skb);
+               }
+               skb_queue_tail(&sk->write_queue, skb);
+               
+               /*
+                *      If we don't fit we have to start the zero window
+                *      probes. This is broken - we really need to do a partial
+                *      send _first_ (This is what causes the Cisco and PC/TCP
+                *      grief).
+                */
+                
+               if (before(sk->window_seq, sk->write_queue.next->end_seq) &&
+                   sk->send_head == NULL && sk->ack_backlog == 0)
+                       tcp_reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
+       } 
+       else 
+       {
+               /*
+                *      This is going straight out
+                */
+                
+               th->ack_seq = htonl(sk->acked_seq);
+               th->window = htons(tcp_select_window(sk));
+
+               tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
+
+               sk->sent_seq = sk->write_seq;
+               
+               /*
+                *      This is mad. The tcp retransmit queue is put together
+                *      by the ip layer. This causes half the problems with
+                *      unroutable FIN's and other things.
+                */
+                
+               sk->prot->queue_xmit(sk, skb->dev, skb, 0);
+               
+               
+               sk->ack_backlog = 0;
+               sk->bytes_rcv = 0;
+
+               /*
+                *      Set for next retransmit based on expected ACK time.
+                *      FIXME: We set this every time which means our 
+                *      retransmits are really about a window behind.
+                */
+
+               tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
+       }
+}
+
+/*
+ *     Locking problems lead us to a messy situation where we can have
+ *     multiple partially complete buffers queued up. This is really bad
+ *     as we don't want to be sending partial buffers. Fix this with
+ *     a semaphore or similar to lock tcp_write per socket.
+ *
+ *     These routines are pretty self descriptive.
+ */
+ 
+struct sk_buff * tcp_dequeue_partial(struct sock * sk)
+{
+       struct sk_buff * skb;
+       unsigned long flags;
+
+       save_flags(flags);
+       cli();
+       skb = sk->partial;
+       if (skb) {
+               sk->partial = NULL;
+               del_timer(&sk->partial_timer);
+       }
+       restore_flags(flags);
+       return skb;
+}
+
+/*
+ *     Empty the partial queue
+ */
+ 
+void tcp_send_partial(struct sock *sk)
+{
+       struct sk_buff *skb;
+
+       if (sk == NULL)
+               return;
+       while ((skb = tcp_dequeue_partial(sk)) != NULL)
+               tcp_send_skb(sk, skb);
+}
+
+/*
+ *     Queue a partial frame
+ */
+ 
+void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
+{
+       struct sk_buff * tmp;
+       unsigned long flags;
+
+       save_flags(flags);
+       cli();
+       tmp = sk->partial;
+       if (tmp)
+               del_timer(&sk->partial_timer);
+       sk->partial = skb;
+       init_timer(&sk->partial_timer);
+       /*
+        *      Wait up to 1 second for the buffer to fill.
+        */
+       sk->partial_timer.expires = jiffies+HZ;
+       sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
+       sk->partial_timer.data = (unsigned long) sk;
+       add_timer(&sk->partial_timer);
+       restore_flags(flags);
+       if (tmp)
+               tcp_send_skb(sk, tmp);
+}
+
+/*
+ *     This routine takes stuff off of the write queue,
+ *     and puts it in the xmit queue. This happens as incoming acks
+ *     open up the remote window for us.
+ */
+ 
+void tcp_write_xmit(struct sock *sk)
+{
+       struct sk_buff *skb;
+
+       /*
+        *      The bytes will have to remain here. In time closedown will
+        *      empty the write queue and all will be happy 
+        */
+
+       if(sk->zapped)
+               return;
+
+       /*
+        *      Anything on the transmit queue that fits the window can
+        *      be added providing we are not
+        *
+        *      a) retransmitting (Nagle's rule)
+        *      b) exceeding our congestion window.
+        */
+        
+       while((skb = skb_peek(&sk->write_queue)) != NULL &&
+               before(skb->end_seq, sk->window_seq + 1) &&
+               (sk->retransmits == 0 ||
+                sk->ip_xmit_timeout != TIME_WRITE ||
+                before(skb->end_seq, sk->rcv_ack_seq + 1))
+               && sk->packets_out < sk->cong_window) 
+       {
+               IS_SKB(skb);
+               skb_unlink(skb);
+               
+               /*
+                *      See if we really need to send the packet. 
+                */
+                
+               if (before(skb->end_seq, sk->rcv_ack_seq +1)) 
+               {
+                       /*
+                        *      This is acked data. We can discard it. This 
+                        *      cannot currently occur.
+                        */
+                        
+                       sk->retransmits = 0;
+                       kfree_skb(skb, FREE_WRITE);
+                       if (!sk->dead) 
+                               sk->write_space(sk);
+               } 
+               else
+               {
+                       struct tcphdr *th;
+                       struct iphdr *iph;
+                       int size;
+/*
+ * put in the ack seq and window at this point rather than earlier,
+ * in order to keep them monotonic.  We really want to avoid taking
+ * back window allocations.  That's legal, but RFC1122 says it's frowned on.
+ * Ack and window will in general have changed since this packet was put
+ * on the write queue.
+ */
+                       iph = skb->ip_hdr;
+                       th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
+                       size = skb->len - (((unsigned char *) th) - skb->data);
+#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
+                       if (size > sk->mtu - sizeof(struct iphdr))
+                       {
+                               iph->frag_off &= ~htons(IP_DF);
+                               ip_send_check(iph);
+                       }
+#endif
+                       
+                       th->ack_seq = htonl(sk->acked_seq);
+                       th->window = htons(tcp_select_window(sk));
+
+                       tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
+
+                       sk->sent_seq = skb->end_seq;
+                       
+                       /*
+                        *      IP manages our queue for some crazy reason
+                        */
+                        
+                       sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
+                       
+                       
+                       sk->ack_backlog = 0;
+                       sk->bytes_rcv = 0;
+
+                       /*
+                        *      Again we slide the timer wrongly
+                        */
+                        
+                       tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
+               }
+       }
+}
+
+
+/*
+ *     A socket has timed out on its send queue and wants to do a
+ *     little retransmitting. Currently this means TCP.
+ */
+
+void tcp_do_retransmit(struct sock *sk, int all)
+{
+       struct sk_buff * skb;
+       struct proto *prot;
+       struct device *dev;
+       int ct=0;
+       struct rtable *rt;
+
+       prot = sk->prot;
+       skb = sk->send_head;
+
+       while (skb != NULL)
+       {
+               struct tcphdr *th;
+               struct iphdr *iph;
+               int size;
+
+               dev = skb->dev;
+               IS_SKB(skb);
+               skb->when = jiffies;
+               
+               /* dl1bke 960201 - @%$$! Hope this cures strange race conditions    */
+               /*                 with AX.25 mode VC. (esp. DAMA)                  */
+               /*                 if the buffer is locked we should not retransmit */
+               /*                 anyway, so we don't need all the fuss to prepare */
+               /*                 the buffer in this case.                         */
+               /*                 (the skb_pull() changes skb->data while we may   */
+               /*                 actually try to send the data. Ough. A side      */
+               /*                 effect is that we'll send some unnecessary data, */
+               /*                 but the alternative is desastrous...             */
+               
+               if (skb_device_locked(skb))
+                       break;
+
+               /*
+                *      Discard the surplus MAC header
+                */
+                
+               skb_pull(skb,((unsigned char *)skb->ip_hdr)-skb->data);
+
+               /*
+                * In general it's OK just to use the old packet.  However we
+                * need to use the current ack and window fields.  Urg and
+                * urg_ptr could possibly stand to be updated as well, but we
+                * don't keep the necessary data.  That shouldn't be a problem,
+                * if the other end is doing the right thing.  Since we're
+                * changing the packet, we have to issue a new IP identifier.
+                */
+
+               iph = (struct iphdr *)skb->data;
+               th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2));
+               size = ntohs(iph->tot_len) - (iph->ihl<<2);
+               
+               /*
+                *      Note: We ought to check for window limits here but
+                *      currently this is done (less efficiently) elsewhere.
+                */
+
+               /*
+                *      Put a MAC header back on (may cause ARPing)
+                */
+                
+               {
+                       /* ANK: UGLY, but the bug, that was here, should be fixed.
+                        */
+                       struct options *  opt = (struct options*)skb->proto_priv;
+                       rt = ip_check_route(&sk->ip_route_cache, opt->srr?opt->faddr:iph->daddr, skb->localroute);
+               }
+
+               iph->id = htons(ip_id_count++);
+#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
+               if (rt && ntohs(iph->tot_len) > rt->rt_mtu)
+                       iph->frag_off &= ~htons(IP_DF);
+#endif
+               ip_send_check(iph);
+                       
+               if (rt==NULL)   /* Deep poo */
+               {
+                       if(skb->sk)
+                       {
+                               skb->sk->err_soft=ENETUNREACH;
+                               skb->sk->error_report(skb->sk);
+                       }
+               }
+               else
+               {
+                       dev=rt->rt_dev;
+                       skb->raddr=rt->rt_gateway;
+                       skb->dev=dev;
+                       skb->arp=1;
+                       if (rt->rt_hh)
+                       {
+                               memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
+                               if (!rt->rt_hh->hh_uptodate)
+                               {
+                                       skb->arp = 0;
+#if RT_CACHE_DEBUG >= 2
+                                       printk("tcp_do_retransmit: hh miss %08x via %08x\n", iph->daddr, rt->rt_gateway);
+#endif
+                               }
+                       }
+                       else if (dev->hard_header)
+                       {
+                               if(dev->hard_header(skb, dev, ETH_P_IP, NULL, NULL, skb->len)<0)
+                                       skb->arp=0;
+                       }
+               
+                       /*
+                        *      This is not the right way to handle this. We have to
+                        *      issue an up to date window and ack report with this 
+                        *      retransmit to keep the odd buggy tcp that relies on 
+                        *      the fact BSD does this happy. 
+                        *      We don't however need to recalculate the entire 
+                        *      checksum, so someone wanting a small problem to play
+                        *      with might like to implement RFC1141/RFC1624 and speed
+                        *      this up by avoiding a full checksum.
+                        */
+                
+                       th->ack_seq = htonl(sk->acked_seq);
+                       sk->ack_backlog = 0;
+                       sk->bytes_rcv = 0;
+                       th->window = ntohs(tcp_select_window(sk));
+                       tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
+               
+                       /*
+                        *      If the interface is (still) up and running, kick it.
+                        */
+       
+                       if (dev->flags & IFF_UP)
+                       {
+                               /*
+                                *      If the packet is still being sent by the device/protocol
+                                *      below then don't retransmit. This is both needed, and good -
+                                *      especially with connected mode AX.25 where it stops resends
+                                *      occurring of an as yet unsent anyway frame!
+                                *      We still add up the counts as the round trip time wants
+                                *      adjusting.
+                                */
+                               if (sk && !skb_device_locked(skb))
+                               {
+                                       /* Remove it from any existing driver queue first! */
+                                       skb_unlink(skb);
+                                       /* Now queue it */
+                                       ip_statistics.IpOutRequests++;
+                                       dev_queue_xmit(skb, dev, sk->priority);
+                               }
+                       }
+               }
+               
+               /*
+                *      Count retransmissions
+                */
+                
+               ct++;
+               sk->prot->retransmits ++;
+               tcp_statistics.TcpRetransSegs++;
+               
+
+               /*
+                *      Only one retransmit requested.
+                */
+       
+               if (!all)
+                       break;
+
+               /*
+                *      This should cut it off before we send too many packets.
+                */
+
+               if (ct >= sk->cong_window)
+                       break;
+               skb = skb->link3;
+       }
+}
+
+/*
+ *     This routine will send an RST to the other tcp. 
+ */
+ 
+void tcp_send_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
+         struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
+{
+       struct sk_buff *buff;
+       struct tcphdr *t1;
+       int tmp;
+       struct device *ndev=NULL;
+
+       /*
+        *      Cannot reset a reset (Think about it).
+        */
+        
+       if(th->rst)
+               return;
+  
+       /*
+        * We need to grab some memory, and put together an RST,
+        * and then put it into the queue to be sent.
+        */
+
+       buff = sock_wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
+       if (buff == NULL) 
+               return;
+
+       buff->sk = NULL;
+       buff->dev = dev;
+       buff->localroute = 0;
+
+       /*
+        *      Put in the IP header and routing stuff. 
+        */
+
+       tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
+                          sizeof(struct tcphdr),tos,ttl,NULL);
+       if (tmp < 0) 
+       {
+               buff->free = 1;
+               sock_wfree(NULL, buff);
+               return;
+       }
+
+       t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
+       memcpy(t1, th, sizeof(*t1));
+
+       /*
+        *      Swap the send and the receive. 
+        */
+
+       t1->dest = th->source;
+       t1->source = th->dest;
+       t1->rst = 1;  
+       t1->window = 0;
+  
+       if(th->ack)
+       {
+               t1->ack = 0;
+               t1->seq = th->ack_seq;
+               t1->ack_seq = 0;
+       }
+       else
+       {
+               t1->ack = 1;
+               if(!th->syn)
+                       t1->ack_seq = th->seq;
+               else
+                       t1->ack_seq = htonl(ntohl(th->seq)+1);
+               t1->seq = 0;
+       }
+
+       t1->syn = 0;
+       t1->urg = 0;
+       t1->fin = 0;
+       t1->psh = 0;
+       t1->doff = sizeof(*t1)/4;
+       tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
+       prot->queue_xmit(NULL, ndev, buff, 1);
+       tcp_statistics.TcpOutSegs++;
+}
+
+/*
+ *     Send a fin.
+ */
+
+void tcp_send_fin(struct sock *sk)
+{
+       struct proto *prot =(struct proto *)sk->prot;
+       struct tcphdr *th =(struct tcphdr *)&sk->dummy_th;
+       struct tcphdr *t1;
+       struct sk_buff *buff;
+       struct device *dev=NULL;
+       int tmp;
+               
+       release_sock(sk); /* in case the malloc sleeps. */
+       
+       buff = sock_wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
+       sk->inuse = 1;
+
+       if (buff == NULL)
+       {
+               /* This is a disaster if it occurs */
+               printk("tcp_send_fin: Impossible malloc failure");
+               return;
+       }
+
+       /*
+        *      Administrivia
+        */
+        
+       buff->sk = sk;
+       buff->localroute = sk->localroute;
+
+       /*
+        *      Put in the IP header and routing stuff. 
+        */
+
+       tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
+                          IPPROTO_TCP, sk->opt,
+                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
+       if (tmp < 0) 
+       {
+               int t;
+               /*
+                *      Finish anyway, treat this as a send that got lost. 
+                *      (Not good).
+                */
+                
+               buff->free = 1;
+               sock_wfree(sk,buff);
+               sk->write_seq++;
+               t=del_timer(&sk->timer);
+               if(t)
+                       add_timer(&sk->timer);
+               else
+                       tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
+               return;
+       }
+       
+       /*
+        *      We ought to check if the end of the queue is a buffer and
+        *      if so simply add the fin to that buffer, not send it ahead.
+        */
+
+       t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
+       buff->dev = dev;
+       memcpy(t1, th, sizeof(*t1));
+       buff->seq = sk->write_seq;
+       sk->write_seq++;
+       buff->end_seq = sk->write_seq;
+       t1->seq = htonl(buff->seq);
+       t1->ack = 1;
+       t1->ack_seq = htonl(sk->acked_seq);
+       t1->window = htons(sk->window=tcp_select_window(sk));
+       t1->fin = 1;
+       t1->rst = 0;
+       t1->doff = sizeof(*t1)/4;
+       tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
+
+       /*
+        * If there is data in the write queue, the fin must be appended to
+        * the write queue.
+        */
+       
+       if (skb_peek(&sk->write_queue) != NULL) 
+       {
+               buff->free = 0;
+               if (buff->next != NULL) 
+               {
+                       printk("tcp_send_fin: next != NULL\n");
+                       skb_unlink(buff);
+               }
+               skb_queue_tail(&sk->write_queue, buff);
+       } 
+       else 
+       {
+               sk->sent_seq = sk->write_seq;
+               sk->prot->queue_xmit(sk, dev, buff, 0);
+               tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
+       }
+}
+
+
+void tcp_send_synack(struct sock * newsk, struct sock * sk, struct sk_buff * skb)
+{
+       struct tcphdr *t1;
+       unsigned char *ptr;
+       struct sk_buff * buff;
+       struct device *ndev=NULL;
+       int tmp;
+
+       buff = sock_wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
+       if (buff == NULL) 
+       {
+               sk->err = ENOMEM;
+               newsk->dead = 1;
+               newsk->state = TCP_CLOSE;
+               /* And this will destroy it */
+               release_sock(newsk);
+               kfree_skb(skb, FREE_READ);
+               tcp_statistics.TcpAttemptFails++;
+               return;
+       }
+  
+       buff->sk = newsk;
+       buff->localroute = newsk->localroute;
+
+       /*
+        *      Put in the IP header and routing stuff. 
+        */
+
+       tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
+                              IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl,&newsk->ip_route_cache);
+
+       /*
+        *      Something went wrong. 
+        */
+
+       if (tmp < 0) 
+       {
+               sk->err = tmp;
+               buff->free = 1;
+               kfree_skb(buff,FREE_WRITE);
+               newsk->dead = 1;
+               newsk->state = TCP_CLOSE;
+               release_sock(newsk);
+               skb->sk = sk;
+               kfree_skb(skb, FREE_READ);
+               tcp_statistics.TcpAttemptFails++;
+               return;
+       }
+
+       t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
+  
+       memcpy(t1, skb->h.th, sizeof(*t1));
+       buff->seq = newsk->write_seq++;
+       buff->end_seq = newsk->write_seq;
+       /*
+        *      Swap the send and the receive. 
+        */
+       t1->dest = skb->h.th->source;
+       t1->source = newsk->dummy_th.source;
+       t1->seq = ntohl(buff->seq);
+       t1->ack = 1;
+       newsk->sent_seq = newsk->write_seq;
+       t1->window = ntohs(tcp_select_window(newsk));
+       t1->res1 = 0;
+       t1->res2 = 0;
+       t1->rst = 0;
+       t1->urg = 0;
+       t1->psh = 0;
+       t1->syn = 1;
+       t1->ack_seq = htonl(newsk->acked_seq);
+       t1->doff = sizeof(*t1)/4+1;
+       ptr = skb_put(buff,4);
+       ptr[0] = 2;
+       ptr[1] = 4;
+       ptr[2] = ((newsk->mtu) >> 8) & 0xff;
+       ptr[3] =(newsk->mtu) & 0xff;
+
+       tcp_send_check(t1, newsk->saddr, newsk->daddr, sizeof(*t1)+4, newsk);
+       newsk->prot->queue_xmit(newsk, ndev, buff, 0);
+       tcp_reset_xmit_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT);
+       skb->sk = newsk;
+
+       /*
+        *      Charge the sock_buff to newsk. 
+        */
+        
+       sk->rmem_alloc -= skb->truesize;
+       newsk->rmem_alloc += skb->truesize;
+       
+       skb_queue_tail(&sk->receive_queue,skb);
+       sk->ack_backlog++;
+       release_sock(newsk);
+       tcp_statistics.TcpOutSegs++;
+}
+
+/*
+ *     This routine sends an ack and also updates the window. 
+ */
+ 
+void tcp_send_ack(u32 sequence, u32 ack,
+            struct sock *sk,
+            struct tcphdr *th, u32 daddr)
+{
+       struct sk_buff *buff;
+       struct tcphdr *t1;
+       struct device *dev = NULL;
+       int tmp;
+
+       if(sk->zapped)
+               return;         /* We have been reset, we may not send again */
+               
+       /*
+        * We need to grab some memory, and put together an ack,
+        * and then put it into the queue to be sent.
+        */
+
+       buff = sock_wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
+       if (buff == NULL) 
+       {
+               /* 
+                *      Force it to send an ack. We don't have to do this
+                *      (ACK is unreliable) but it's much better use of 
+                *      bandwidth on slow links to send a spare ack than
+                *      resend packets. 
+                */
+                
+               sk->ack_backlog++;
+               if (sk->ip_xmit_timeout != TIME_WRITE && tcp_connected(sk->state)) 
+               {
+                       tcp_reset_xmit_timer(sk, TIME_WRITE, HZ);
+               }
+               return;
+       }
+
+       /*
+        *      Assemble a suitable TCP frame
+        */
+        
+       buff->sk = sk;
+       buff->localroute = sk->localroute;
+
+       /* 
+        *      Put in the IP header and routing stuff. 
+        */
+        
+       tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
+                               IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
+       if (tmp < 0) 
+       {
+               buff->free = 1;
+               sock_wfree(sk, buff);
+               return;
+       }
+       t1 =(struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
+
+       memcpy(t1, th, sizeof(*t1));
+
+       /*
+        *      Swap the send and the receive. 
+        */
+        
+       t1->dest = th->source;
+       t1->source = th->dest;
+       t1->seq = ntohl(sequence);
+       t1->ack = 1;
+       sk->window = tcp_select_window(sk);
+       t1->window = ntohs(sk->window);
+       t1->res1 = 0;
+       t1->res2 = 0;
+       t1->rst = 0;
+       t1->urg = 0;
+       t1->syn = 0;
+       t1->psh = 0;
+       t1->fin = 0;
+       
+       /*
+        *      If we have nothing queued for transmit and the transmit timer
+        *      is on we are just doing an ACK timeout and need to switch
+        *      to a keepalive.
+        */
+        
+       if (ack == sk->acked_seq) {               
+               sk->ack_backlog = 0;
+               sk->bytes_rcv = 0;
+               sk->ack_timed = 0;
+
+               if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
+                   && sk->ip_xmit_timeout == TIME_WRITE)       
+                 if(sk->keepopen) 
+                   tcp_reset_xmit_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
+                 else 
+                   delete_timer(sk);                           
+       }
+
+       /*
+        *      Fill in the packet and send it
+        */
+        
+       t1->ack_seq = htonl(ack);
+       t1->doff = sizeof(*t1)/4;
+       tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
+       if (sk->debug)
+                printk("\rtcp_ack: seq %x ack %x\n", sequence, ack);
+       sk->prot->queue_xmit(sk, dev, buff, 1);
+       tcp_statistics.TcpOutSegs++;
+}
+
+/*
+ *     This routine sends a packet with an out of date sequence
+ *     number. It assumes the other end will try to ack it.
+ */
+
+void tcp_write_wakeup(struct sock *sk)
+{
+       struct sk_buff *buff,*skb;
+       struct tcphdr *t1;
+       struct device *dev=NULL;
+       int tmp;
+
+       if (sk->zapped)
+               return; /* After a valid reset we can send no more */
+
+       /*
+        *      Write data can still be transmitted/retransmitted in the
+        *      following states.  If any other state is encountered, return.
+        *      [listen/close will never occur here anyway]
+        */
+
+       if (sk->state != TCP_ESTABLISHED && 
+           sk->state != TCP_CLOSE_WAIT &&
+           sk->state != TCP_FIN_WAIT1 && 
+           sk->state != TCP_LAST_ACK &&
+           sk->state != TCP_CLOSING
+       ) 
+       {
+               return;
+       }
+       if ( before(sk->sent_seq, sk->window_seq) && 
+           (skb=skb_peek(&sk->write_queue)))
+       {
+               /*
+                * We are probing the opening of a window
+                * but the window size is != 0
+                * must have been a result SWS advoidance ( sender )
+                */
+           
+               struct iphdr *iph;
+               struct tcphdr *th;
+               struct tcphdr *nth;
+               unsigned long win_size;
+#if 0
+               unsigned long ow_size;
+#endif
+               void * tcp_data_start;
+       
+               /*
+                *      How many bytes can we send ?
+                */
+                
+               win_size = sk->window_seq - sk->sent_seq;
+
+               /*
+                *      Recover the buffer pointers
+                */
+                
+               iph = (struct iphdr *)skb->ip_hdr;
+               th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
+
+               /*
+                *      Grab the data for a temporary frame
+                */
+                
+               buff = sock_wmalloc(sk, win_size + th->doff * 4 + 
+                                    (iph->ihl << 2) +
+                                    sk->prot->max_header + 15, 
+                                    1, GFP_ATOMIC);
+               if ( buff == NULL )
+                       return;
+
+               /* 
+                *      If we strip the packet on the write queue we must
+                *      be ready to retransmit this one 
+                */
+           
+               buff->free = /*0*/1;
+
+               buff->sk = sk;
+               buff->localroute = sk->localroute;
+               
+               /*
+                *      Put headers on the new packet
+                */
+
+               tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
+                                        IPPROTO_TCP, sk->opt, buff->truesize,
+                                        sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
+               if (tmp < 0) 
+               {
+                       sock_wfree(sk, buff);
+                       return;
+               }
+               
+               /*
+                *      Move the TCP header over
+                */
+
+               buff->dev = dev;
+
+               nth = (struct tcphdr *) skb_put(buff,th->doff*4);
+
+               memcpy(nth, th, th->doff * 4);
+               
+               /*
+                *      Correct the new header
+                */
+                
+               nth->ack = 1; 
+               nth->ack_seq = htonl(sk->acked_seq);
+               nth->window = htons(tcp_select_window(sk));
+               nth->check = 0;
+
+               /*
+                *      Find the first data byte.
+                */
+                
+               tcp_data_start = (char *) th + (th->doff << 2);
+
+               /*
+                *      Add it to our new buffer
+                */
+                
+               memcpy(skb_put(buff,win_size), tcp_data_start, win_size);
+               
+               /*
+                *      Remember our right edge sequence number.
+                */
+                
+               buff->end_seq = sk->sent_seq + win_size;
+               sk->sent_seq = buff->end_seq;           /* Hack */
+               if(th->urg && ntohs(th->urg_ptr) < win_size)
+                       nth->urg = 0;
+
+               /*
+                *      Checksum the split buffer
+                */
+                
+               tcp_send_check(nth, sk->saddr, sk->daddr, 
+                          nth->doff * 4 + win_size , sk);
+       }
+       else
+       {       
+               buff = sock_wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
+               if (buff == NULL) 
+                       return;
+
+               buff->free = 1;
+               buff->sk = sk;
+               buff->localroute = sk->localroute;
+
+               /*
+                *      Put in the IP header and routing stuff. 
+                */
+                
+               tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
+                               IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl,&sk->ip_route_cache);
+               if (tmp < 0) 
+               {
+                       sock_wfree(sk, buff);
+                       return;
+               }
+
+               t1 = (struct tcphdr *)skb_put(buff,sizeof(struct tcphdr));
+               memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
+
+               /*
+                *      Use a previous sequence.
+                *      This should cause the other end to send an ack.
+                */
+        
+               t1->seq = htonl(sk->sent_seq-1);
+               t1->ack = 1; 
+               t1->res1= 0;
+               t1->res2= 0;
+               t1->rst = 0;
+               t1->urg = 0;
+               t1->psh = 0;
+               t1->fin = 0;    /* We are sending a 'previous' sequence, and 0 bytes of data - thus no FIN bit */
+               t1->syn = 0;
+               t1->ack_seq = htonl(sk->acked_seq);
+               t1->window = htons(tcp_select_window(sk));
+               t1->doff = sizeof(*t1)/4;
+               tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
+
+       }               
+
+       /*
+        *      Send it.
+        */
+       
+       sk->prot->queue_xmit(sk, dev, buff, 1);
+       tcp_statistics.TcpOutSegs++;
+}
+
+/*
+ *     A window probe timeout has occurred.
+ */
+
+void tcp_send_probe0(struct sock *sk)
+{
+       if (sk->zapped)
+               return;         /* After a valid reset we can send no more */
+
+       tcp_write_wakeup(sk);
+
+       sk->backoff++;
+       sk->rto = min(sk->rto << 1, 120*HZ);
+       sk->retransmits++;
+       sk->prot->retransmits ++;
+       tcp_reset_xmit_timer (sk, TIME_PROBE0, sk->rto);
+}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c

new file mode 100644 (file)

index 0000000..8f5bdeb
--- /dev/null
+++ b/net/ipv4/tcp_timer.c
@@ -0,0 +1,287 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Implementation of the Transmission Control Protocol(TCP).
+ *
+ * Version:    @(#)tcp.c       1.0.16  05/25/93
+ *
+ * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
+ *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *             Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *             Corey Minyard <wf-rch!minyard@relay.EU.net>
+ *             Florian La Roche, <flla@stud.uni-sb.de>
+ *             Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
+ *             Linus Torvalds, <torvalds@cs.helsinki.fi>
+ *             Alan Cox, <gw4pts@gw4pts.ampr.org>
+ *             Matthew Dillon, <dillon@apollo.west.oic.com>
+ *             Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *             Jorge Cwik, <jorge@laser.satlink.net>
+ */
+
+#include <net/tcp.h>
+
+/*
+ *     Reset the retransmission timer
+ */
+ 
+void tcp_reset_xmit_timer(struct sock *sk, int why, unsigned long when)
+{
+       del_timer(&sk->retransmit_timer);
+       sk->ip_xmit_timeout = why;
+       if((long)when < 0)
+       {
+               when=3;
+               printk("Error: Negative timer in xmit_timer\n");
+       }
+       sk->retransmit_timer.expires=jiffies+when;
+       add_timer(&sk->retransmit_timer);
+}
+
+/*
+ *     This is the normal code called for timeouts.  It does the retransmission
+ *     and then does backoff.  tcp_do_retransmit is separated out because
+ *     tcp_ack needs to send stuff from the retransmit queue without
+ *     initiating a backoff.
+ */
+
+
+static void tcp_retransmit_time(struct sock *sk, int all)
+{
+       tcp_do_retransmit(sk, all);
+
+       /*
+        * Increase the timeout each time we retransmit.  Note that
+        * we do not increase the rtt estimate.  rto is initialized
+        * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
+        * that doubling rto each time is the least we can get away with.
+        * In KA9Q, Karn uses this for the first few times, and then
+        * goes to quadratic.  netBSD doubles, but only goes up to *64,
+        * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
+        * defined in the protocol as the maximum possible RTT.  I guess
+        * we'll have to use something other than TCP to talk to the
+        * University of Mars.
+        *
+        * PAWS allows us longer timeouts and large windows, so once
+        * implemented ftp to mars will work nicely. We will have to fix
+        * the 120 second clamps though!
+        */
+
+       sk->retransmits++;
+       sk->prot->retransmits++;
+       sk->backoff++;
+       sk->rto = min(sk->rto << 1, 120*HZ);
+       tcp_reset_xmit_timer(sk, TIME_WRITE, sk->rto);
+}
+
+/*
+ *     A timer event has trigger a tcp retransmit timeout. The
+ *     socket xmit queue is ready and set up to send. Because
+ *     the ack receive code keeps the queue straight we do
+ *     nothing clever here.
+ */
+
+void tcp_retransmit(struct sock *sk, int all)
+{
+       if (all) 
+       {
+               tcp_retransmit_time(sk, all);
+               return;
+       }
+
+       sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
+       /* sk->ssthresh in theory can be zero.  I guess that's OK */
+       sk->cong_count = 0;
+
+       sk->cong_window = 1;
+
+       /* Do the actual retransmit. */
+       tcp_retransmit_time(sk, all);
+}
+
+/*
+ *     A write timeout has occurred. Process the after effects.
+ */
+
+static int tcp_write_timeout(struct sock *sk)
+{
+       /*
+        *      Look for a 'soft' timeout.
+        */
+       if ((sk->state == TCP_ESTABLISHED && sk->retransmits && !(sk->retransmits & 7))
+               || (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR1)) 
+       {
+               /*
+                *      Attempt to recover if arp has changed (unlikely!) or
+                *      a route has shifted (not supported prior to 1.3).
+                */
+               ip_rt_advice(&sk->ip_route_cache, 0);
+       }
+       
+       /*
+        *      Have we tried to SYN too many times (repent repent 8))
+        */
+        
+       if(sk->retransmits > TCP_SYN_RETRIES && sk->state==TCP_SYN_SENT)
+       {
+               if(sk->err_soft)
+                       sk->err=sk->err_soft;
+               else
+                       sk->err=ETIMEDOUT;
+               sk->error_report(sk);
+               del_timer(&sk->retransmit_timer);
+               tcp_statistics.TcpAttemptFails++;       /* Is this right ??? - FIXME - */
+               tcp_set_state(sk,TCP_CLOSE);
+               /* Don't FIN, we got nothing back */
+               release_sock(sk);
+               return 0;
+       }
+       /*
+        *      Has it gone just too far ?
+        */
+       if (sk->retransmits > TCP_RETR2) 
+       {
+               if(sk->err_soft)
+                       sk->err = sk->err_soft;
+               else
+                       sk->err = ETIMEDOUT;
+               sk->error_report(sk);
+               del_timer(&sk->retransmit_timer);
+               /*
+                *      Time wait the socket 
+                */
+               if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 || sk->state == TCP_CLOSING ) 
+               {
+                       tcp_set_state(sk,TCP_TIME_WAIT);
+                       tcp_reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
+               }
+               else
+               {
+                       /*
+                        *      Clean up time.
+                        */
+                       tcp_set_state(sk, TCP_CLOSE);
+                       release_sock(sk);
+                       return 0;
+               }
+       }
+       return 1;
+}
+
+/*
+ *     The TCP retransmit timer. This lacks a few small details.
+ *
+ *     1.      An initial rtt timeout on the probe0 should cause what we can
+ *             of the first write queue buffer to be split and sent.
+ *     2.      On a 'major timeout' as defined by RFC1122 we shouldn't report
+ *             ETIMEDOUT if we know an additional 'soft' error caused this.
+ *             tcp_err should save a 'soft error' for us.
+ */
+
+void tcp_retransmit_timer(unsigned long data)
+{
+       struct sock *sk = (struct sock*)data;
+       int why = sk->ip_xmit_timeout;
+
+       /*
+        *      We are reset. We will send no more retransmits.
+        */
+        
+       if(sk->zapped)
+               return;
+               
+       /* 
+        *      Only process if socket is not in use
+        */
+
+       cli();
+       if (sk->inuse || in_bh) 
+       {
+               /* Try again in 1 second */
+               sk->retransmit_timer.expires = jiffies+HZ;
+               add_timer(&sk->retransmit_timer);
+               sti();
+               return;
+       }
+
+       sk->inuse = 1;
+       sti();
+
+
+       if (sk->ack_backlog && !sk->dead) 
+               sk->data_ready(sk,0);
+
+       /* Now we need to figure out why the socket was on the timer. */
+
+       switch (why) 
+       {
+               /* Window probing */
+               case TIME_PROBE0:
+                       tcp_send_probe0(sk);
+                       tcp_write_timeout(sk);
+                       break;
+               /* Retransmitting */
+               case TIME_WRITE:
+                       /* It could be we got here because we needed to send an ack.
+                        * So we need to check for that.
+                        */
+               {
+                       struct sk_buff *skb;
+                       unsigned long flags;
+
+                       save_flags(flags);
+                       cli();
+                       skb = sk->send_head;
+                       if (!skb) 
+                       {
+                               if (sk->ack_backlog)
+                                       tcp_read_wakeup(sk);
+                               restore_flags(flags);
+                       } 
+                       else 
+                       {
+                               /*
+                                *      Kicked by a delayed ack. Reset timer
+                                *      correctly now
+                                */
+                               if (jiffies < skb->when + sk->rto) 
+                               {
+                                       if (sk->ack_backlog)
+                                               tcp_read_wakeup(sk);
+                                       tcp_reset_xmit_timer (sk, TIME_WRITE, skb->when + sk->rto - jiffies);
+                                       restore_flags(flags);
+                                       break;
+                               }
+                               restore_flags(flags);
+                               /*
+                                *      Retransmission
+                                */
+                               sk->retransmits++;
+                               sk->prot->retransmits++;
+                               sk->prot->retransmit (sk, 0);
+                               tcp_write_timeout(sk);
+                       }
+                       break;
+               }
+               /* Sending Keepalives */
+               case TIME_KEEPOPEN:
+                       /* 
+                        * this reset_timer() call is a hack, this is not
+                        * how KEEPOPEN is supposed to work.
+                        */
+                       tcp_reset_xmit_timer (sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
+
+                       /* Send something to keep the connection open. */
+                       if (sk->prot->write_wakeup)
+                                 sk->prot->write_wakeup (sk);
+                       sk->retransmits++;
+                       sk->prot->retransmits++;
+                       tcp_write_timeout(sk);
+                       break;
+               default:
+                       printk ("rexmit_timer: timer expired - reason unknown\n");
+                       break;
+       }
+       release_sock(sk);
+}
diff --git a/net/ipv4/timer.c b/net/ipv4/timer.c

index e62cf1486f3f7affe34674e06645e4ca3b9d1812..2c3f6fa9a48b0e3a711095291b8ec8a1cd713058 100644 (file)
--- a/net/ipv4/timer.c
+++ b/net/ipv4/timer.c
@@ -141,7 +141,8 @@ void net_timer (unsigned long data)
                         }
                         if(sk->wmem_alloc==0 && sk->rmem_alloc==0)
                                 destroy_sock(sk);       /* Socket gone, DON'T update sk->inuse! */
-                               break;
+                       break;
+
                 case TIME_CLOSE:
                         /* We've waited long enough, close the socket. */
                         sk->state = TCP_CLOSE;
@@ -152,6 +153,7 @@ void net_timer (unsigned long data)
                         reset_timer (sk, TIME_DESTROY, TCP_DONE_TIME);
                         release_sock (sk);
                         break;
+
                 default:
                         printk ("net_timer: timer expired - reason %d is unknown\n", why);
                         release_sock (sk);
author	Linus Torvalds <torvalds@linuxfoundation.org>
	Fri, 23 Nov 2007 20:10:33 +0000 (15:10 -0500)
committer	Linus Torvalds <torvalds@linuxfoundation.org>
	Fri, 23 Nov 2007 20:10:33 +0000 (15:10 -0500)
Makefile		patch \| blob \| history
arch/alpha/defconfig		patch \| blob \| history
arch/alpha/kernel/entry.S		patch \| blob \| history
drivers/block/ide-cd.c		patch \| blob \| history
drivers/char/ChangeLog		patch \| blob \| history
drivers/char/apm_bios.c		patch \| blob \| history
drivers/char/console.c		patch \| blob \| history
drivers/char/serial.c		patch \| blob \| history
drivers/char/vt.c		patch \| blob \| history
drivers/char/vt_kern.h		patch \| blob \| history
drivers/net/3c59x.c		patch \| blob \| history
drivers/net/ibmtr.c		patch \| blob \| history
drivers/net/ibmtr.h		patch \| blob \| history
drivers/scsi/st.c		patch \| blob \| history
fs/fat/dir.c		patch \| blob \| history
fs/fat/inode.c		patch \| blob \| history
fs/proc/array.c		patch \| blob \| history
include/linux/msdos_fs.h		patch \| blob \| history
include/linux/msdos_fs_i.h		patch \| blob \| history
include/linux/random.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
include/linux/tcp.h		patch \| blob \| history
include/linux/time.h		patch \| blob \| history
include/net/icmp.h		patch \| blob \| history
include/net/protocol.h		patch \| blob \| history
include/net/tcp.h		patch \| blob \| history
init/main.c		patch \| blob \| history
kernel/sched.c		patch \| blob \| history
net/core/sock.c		patch \| blob \| history
net/ipv4/Makefile		patch \| blob \| history
net/ipv4/af_inet.c		patch \| blob \| history
net/ipv4/tcp.c		patch \| blob \| history
net/ipv4/tcp_input.c	[new file with mode: 0644]	patch \| blob
net/ipv4/tcp_output.c	[new file with mode: 0644]	patch \| blob
net/ipv4/tcp_timer.c	[new file with mode: 0644]	patch \| blob
net/ipv4/timer.c		patch \| blob \| history