Import 2.3.10pre2

author Linus Torvalds <torvalds@linuxfoundation.org>

Fri, 23 Nov 2007 20:25:59 +0000 (15:25 -0500)

committer Linus Torvalds <torvalds@linuxfoundation.org>

Fri, 23 Nov 2007 20:25:59 +0000 (15:25 -0500)
author Linus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:25:59 +0000 (15:25 -0500)
committer Linus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:25:59 +0000 (15:25 -0500)
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c

index 28aea16a8bc26d9a2e80b0f98c81140de9c756cf..09664ba815c087c323b9a6aa5f749cd43ff298da 100644 (file)
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -73,7 +73,8 @@ static inline int put_stack_long(struct task_struct *task, int offset,
   * and that it is in the task area before calling this: this routine does
   * no checking.
   */
-static unsigned long get_long(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr)
+static unsigned long get_long(struct task_struct * tsk, 
+       struct vm_area_struct * vma, unsigned long addr)
  {
         pgd_t * pgdir;
         pmd_t * pgmiddle;
@@ -83,7 +84,7 @@ static unsigned long get_long(struct mm_struct * mm, struct vm_area_struct * vma
  repeat:
         pgdir = pgd_offset(vma->vm_mm, addr);
         if (pgd_none(*pgdir)) {
-               handle_mm_fault(mm, vma, addr, 0);
+               handle_mm_fault(tsk, vma, addr, 0);
                 goto repeat;
         }
         if (pgd_bad(*pgdir)) {
@@ -93,7 +94,7 @@ repeat:
         }
         pgmiddle = pmd_offset(pgdir, addr);
         if (pmd_none(*pgmiddle)) {
-               handle_mm_fault(mm, vma, addr, 0);
+               handle_mm_fault(tsk, vma, addr, 0);
                 goto repeat;
         }
         if (pmd_bad(*pgmiddle)) {
@@ -103,7 +104,7 @@ repeat:
         }
         pgtable = pte_offset(pgmiddle, addr);
         if (!pte_present(*pgtable)) {
-               handle_mm_fault(mm, vma, addr, 0);
+               handle_mm_fault(tsk, vma, addr, 0);
                 goto repeat;
         }
         page = pte_page(*pgtable);
@@ -123,7 +124,7 @@ repeat:
   * Now keeps R/W state of page so that a text page stays readonly
   * even if a debugger scribbles breakpoints into it.  -M.U-
   */
-static void put_long(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr,
+static void put_long(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long addr,
         unsigned long data)
  {
         pgd_t *pgdir;
@@ -134,7 +135,7 @@ static void put_long(struct mm_struct * mm, struct vm_area_struct * vma, unsigne
  repeat:
         pgdir = pgd_offset(vma->vm_mm, addr);
         if (!pgd_present(*pgdir)) {
-               handle_mm_fault(mm, vma, addr, 1);
+               handle_mm_fault(tsk, vma, addr, 1);
                 goto repeat;
         }
         if (pgd_bad(*pgdir)) {
@@ -144,7 +145,7 @@ repeat:
         }
         pgmiddle = pmd_offset(pgdir, addr);
         if (pmd_none(*pgmiddle)) {
-               handle_mm_fault(mm, vma, addr, 1);
+               handle_mm_fault(tsk, vma, addr, 1);
                 goto repeat;
         }
         if (pmd_bad(*pgmiddle)) {
@@ -154,12 +155,12 @@ repeat:
         }
         pgtable = pte_offset(pgmiddle, addr);
         if (!pte_present(*pgtable)) {
-               handle_mm_fault(mm, vma, addr, 1);
+               handle_mm_fault(tsk, vma, addr, 1);
                 goto repeat;
         }
         page = pte_page(*pgtable);
         if (!pte_write(*pgtable)) {
-               handle_mm_fault(mm, vma, addr, 1);
+               handle_mm_fault(tsk, vma, addr, 1);
                 goto repeat;
         }
  /* this is a hack for non-kernel-mapped video buffers and similar */
@@ -175,10 +176,10 @@ repeat:
   * This routine checks the page boundaries, and that the offset is
   * within the task area. It then calls get_long() to read a long.
   */
-static int read_long(struct mm_struct * mm, unsigned long addr,
+static int read_long(struct task_struct * tsk, unsigned long addr,
         unsigned long * result)
  {
-       struct vm_area_struct * vma = find_extend_vma(mm, addr);
+       struct vm_area_struct * vma = find_extend_vma(tsk, addr);
  
         if (!vma)
                 return -EIO;
@@ -191,8 +192,8 @@ static int read_long(struct mm_struct * mm, unsigned long addr,
                         if (!vma_high || vma_high->vm_start != vma->vm_end)
                                 return -EIO;
                 }
-               low = get_long(mm, vma, addr & ~(sizeof(long)-1));
-               high = get_long(mm, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
+               low = get_long(tsk, vma, addr & ~(sizeof(long)-1));
+               high = get_long(tsk, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
                 switch (addr & (sizeof(long)-1)) {
                         case 1:
                                 low >>= 8;
@@ -209,7 +210,7 @@ static int read_long(struct mm_struct * mm, unsigned long addr,
                 }
                 *result = low;
         } else
-               *result = get_long(mm, vma, addr);
+               *result = get_long(tsk, vma, addr);
         return 0;
  }
  
@@ -217,10 +218,10 @@ static int read_long(struct mm_struct * mm, unsigned long addr,
   * This routine checks the page boundaries, and that the offset is
   * within the task area. It then calls put_long() to write a long.
   */
-static int write_long(struct mm_struct * mm, unsigned long addr,
+static int write_long(struct task_struct * tsk, unsigned long addr,
         unsigned long data)
  {
-       struct vm_area_struct * vma = find_extend_vma(mm, addr);
+       struct vm_area_struct * vma = find_extend_vma(tsk, addr);
  
         if (!vma)
                 return -EIO;
@@ -233,8 +234,8 @@ static int write_long(struct mm_struct * mm, unsigned long addr,
                         if (!vma_high || vma_high->vm_start != vma->vm_end)
                                 return -EIO;
                 }
-               low = get_long(mm, vma, addr & ~(sizeof(long)-1));
-               high = get_long(mm, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
+               low = get_long(tsk, vma, addr & ~(sizeof(long)-1));
+               high = get_long(tsk, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
                 switch (addr & (sizeof(long)-1)) {
                         case 0: /* shouldn't happen, but safety first */
                                 low = data;
@@ -258,10 +259,10 @@ static int write_long(struct mm_struct * mm, unsigned long addr,
                                 high |= data >> 8;
                                 break;
                 }
-               put_long(mm, vma, addr & ~(sizeof(long)-1),low);
-               put_long(mm, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1),high);
+               put_long(tsk, vma, addr & ~(sizeof(long)-1),low);
+               put_long(tsk, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1),high);
         } else
-               put_long(mm, vma, addr, data);
+               put_long(tsk, vma, addr, data);
         return 0;
  }
  
@@ -403,7 +404,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
                         unsigned long tmp;
  
                         down(&child->mm->mmap_sem);
-                       ret = read_long(child->mm, addr, &tmp);
+                       ret = read_long(child, addr, &tmp);
                         up(&child->mm->mmap_sem);
                         if (ret >= 0)
                                 ret = put_user(tmp,(unsigned long *) data);
@@ -436,7 +437,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
                 case PTRACE_POKETEXT: /* write the word at location addr. */
                 case PTRACE_POKEDATA:
                         down(&child->mm->mmap_sem);
-                       ret = write_long(child->mm,addr,data);
+                       ret = write_long(child,addr,data);
                         up(&child->mm->mmap_sem);
                         goto out;
  
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c

index bb808c300ebb6e850d9190a89fb7f30c85a6b9b0..c3e423b216ef48caf4eb1b9d43afa460be108fe5 100644 (file)
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -50,7 +50,8 @@ good_area:
         start &= PAGE_MASK;
  
         for (;;) {
-               handle_mm_fault(current->mm, vma, start, 1);
+               if (handle_mm_fault(current, vma, start, 1) <= 0)
+                       goto bad_area;
                 if (!size)
                         break;
                 size--;
@@ -162,8 +163,13 @@ good_area:
          * make sure we exit gracefully rather than endlessly redo
          * the fault.
          */
-       if (!handle_mm_fault(mm, vma, address, write))
-               goto do_sigbus;
+       {
+               int fault = handle_mm_fault(tsk, vma, address, write);
+               if (fault < 0)
+                       goto out_of_memory;
+               if (!fault)
+                       goto do_sigbus;
+       }
  
         /*
          * Did it hit the DOS screen memory VA from vm86 mode?
@@ -255,6 +261,13 @@ no_context:
   * We ran out of memory, or some other thing happened to us that made
   * us unable to handle the page fault gracefully.
   */
+out_of_memory:
+       up(&mm->mmap_sem);
+       printk("VM: killing process %s\n", tsk->comm);
+       if (error_code & 4)
+               do_exit(SIGKILL);
+       goto no_context;
+
  do_sigbus:
         up(&mm->mmap_sem);
  
diff --git a/drivers/block/hpt34x.c b/drivers/block/hpt34x.c

index e853ee43893505eef0699a405a69034bc23cfd0c..01494ca1e019ad7869ffdc2a58a2195f9d72a59c 100644 (file)
--- a/drivers/block/hpt34x.c
+++ b/drivers/block/hpt34x.c
@@ -20,6 +20,7 @@
   *     = ((hwif->channel ? 2 : 0) + (drive->select.b.unit & 0x01));
   */
  
+#include <linux/config.h>
  #include <linux/types.h>
  #include <linux/kernel.h>
  #include <linux/delay.h>
diff --git a/drivers/block/loop.c b/drivers/block/loop.c

index 26ffb26cac3da1a72a13d98d4b8e704d645de509..29d78129f32ebb939dd2c193bf64c36dad109d8a 100644 (file)
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -391,6 +391,7 @@ static int loop_set_fd(struct loop_device *lo, kdev_t dev, unsigned int arg)
                         lo->lo_backing_file->f_dentry = file->f_dentry;
                         lo->lo_backing_file->f_op = file->f_op;
                         lo->lo_backing_file->private_data = file->private_data;
+                       file_moveto(lo->lo_backing_file, file);
  
                         error = get_write_access(inode);
                         if (error) {
diff --git a/drivers/block/piix.c b/drivers/block/piix.c

index 79a0e873f6daef27a2bd6b31b6e228c06f2c6b42..384712603e359eea75b3b72e4554d95b1c5b02fa 100644 (file)
--- a/drivers/block/piix.c
+++ b/drivers/block/piix.c
@@ -52,6 +52,7 @@
   * #endif
   */
  
+#include <linux/config.h>
  #include <linux/types.h>
  #include <linux/kernel.h>
  #include <linux/ioport.h>
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c

index 574f1b1e9ef4fe836bbaadb3db292de694e237cf..e6f50ebf1050082d803688533bcdd6ea1a394544 100644 (file)
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -150,15 +150,6 @@ void handle_sysrq(int key, struct pt_regs *pt_regs,
  
  /* Aux routines for the syncer */
  
-static void all_files_read_only(void)      /* Kill write permissions of all files */
-{
-       struct file *file;
-
-       for (file = inuse_filps; file; file = file->f_next)
-               if (file->f_dentry && atomic_read(&file->f_count) && S_ISREG(file->f_dentry->d_inode->i_mode))
-                       file->f_mode &= ~2;
-}
-
  static int is_local_disk(kdev_t dev)       /* Guess if the device is a local hard drive */
  {
         unsigned int major = MAJOR(dev);
@@ -192,6 +183,7 @@ static void go_sync(kdev_t dev, int remount_flag)
                 struct super_block *sb = get_super(dev);
                 struct vfsmount *vfsmnt;
                 int ret, flags;
+               struct list_head *p;
  
                 if (!sb) {
                         printk("Superblock not found\n");
@@ -201,6 +193,15 @@ static void go_sync(kdev_t dev, int remount_flag)
                         printk("R/O\n");
                         return;
                 }
+
+               file_list_lock();
+               for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
+                       struct file *file = list_entry(p, struct file, f_list);
+                       if (file->f_dentry && file_count(file)
+                               && S_ISREG(file->f_dentry->d_inode->i_mode))
+                               file->f_mode &= ~2;
+               }
+               file_list_unlock();
                 DQUOT_OFF(dev);
                 fsync_dev(dev);
                 flags = MS_RDONLY;
@@ -240,9 +241,6 @@ void do_emergency_sync(void)
         remount_flag = (emergency_sync_scheduled == EMERG_REMOUNT);
         emergency_sync_scheduled = 0;
  
-       if (remount_flag)
-               all_files_read_only();
-
         for (mnt = vfsmntlist; mnt; mnt = mnt->mnt_next)
                 if (is_local_disk(mnt->mnt_dev))
                         go_sync(mnt->mnt_dev, remount_flag);
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c

index 3951b542274eec8c0821f61bb4323c7b4c57d5fd..d0ceb78fe15b4f203d73b3b43eaf3e8911e740cf 100644 (file)
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -173,13 +173,15 @@ inline int tty_paranoia_check(struct tty_struct *tty, kdev_t device,
  static int check_tty_count(struct tty_struct *tty, const char *routine)
  {
  #ifdef CHECK_TTY_COUNT
-       struct file *f;
+       struct list_head *p;
         int count = 0;
         
-       for(f = inuse_filps; f; f = f->f_next) {
-               if(f->private_data == tty)
+       file_list_lock();
+       for(p = tty->tty_files.next; p != &tty->tty_files; p = p->next) {
+               if(list_entry(p, struct file, f_list)->private_data == tty)
                         count++;
         }
+       file_list_unlock();
         if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
             tty->driver.subtype == PTY_TYPE_SLAVE &&
             tty->link && tty->link->count)
@@ -383,9 +385,9 @@ static struct file_operations hung_up_tty_fops = {
  void do_tty_hangup(void *data)
  {
         struct tty_struct *tty = (struct tty_struct *) data;
-       struct file * filp;
         struct file * cons_filp = NULL;
         struct task_struct *p;
+       struct list_head *l;
         int    closecount = 0, n;
  
         if (!tty)
@@ -395,13 +397,11 @@ void do_tty_hangup(void *data)
         lock_kernel();
         
         check_tty_count(tty, "do_tty_hangup");
-       for (filp = inuse_filps; filp; filp = filp->f_next) {
-               if (filp->private_data != tty)
-                       continue;
+       file_list_lock();
+       for (l = tty->tty_files.next; l != &tty->tty_files; l = l->next) {
+               struct file * filp = list_entry(l, struct file, f_list);
                 if (!filp->f_dentry)
                         continue;
-               if (!filp->f_dentry->d_inode)
-                       continue;
                 if (filp->f_dentry->d_inode->i_rdev == CONSOLE_DEV ||
                     filp->f_dentry->d_inode->i_rdev == SYSCONS_DEV) {
                         cons_filp = filp;
@@ -410,9 +410,10 @@ void do_tty_hangup(void *data)
                 if (filp->f_op != &tty_fops)
                         continue;
                 closecount++;
-               tty_fasync(-1, filp, 0);
+               tty_fasync(-1, filp, 0);        /* can't block */
                 filp->f_op = &hung_up_tty_fops;
         }
+       file_list_unlock();
         
         /* FIXME! What are the locking issues here? This may me overdoing things.. */
         {
@@ -1307,6 +1308,7 @@ retry_open:
  init_dev_done:
  #endif
         filp->private_data = tty;
+       file_move(filp, &tty->tty_files);
         check_tty_count(tty, "tty_open");
         if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
             tty->driver.subtype == PTY_TYPE_MASTER)
@@ -1937,6 +1939,7 @@ static void initialize_tty_struct(struct tty_struct *tty)
         tty->tq_hangup.routine = do_tty_hangup;
         tty->tq_hangup.data = tty;
         sema_init(&tty->atomic_read, 1);
+       INIT_LIST_HEAD(&tty->tty_files);
  }
  
  /*
diff --git a/drivers/misc/parport_daisy.c b/drivers/misc/parport_daisy.c

index 78b63cea26cbcb31442fd35ca0bda3f992a387bd..830d9d400d14454c0ffa29f95e1390df40603af2 100644 (file)
--- a/drivers/misc/parport_daisy.c
+++ b/drivers/misc/parport_daisy.c
@@ -15,7 +15,6 @@
   *
   */
  
-#include <linux/config.h>
  #include <linux/parport.h>
  #include <linux/delay.h>
  #include <asm/uaccess.h>
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c

index dc702848d902a9009a89a415584fd5f3e20fc8bf..d43121e680915dd83fed5f1c5aa1028802d16117 100644 (file)
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -890,7 +890,7 @@ scsi_tape_flush(struct file * filp)
      kdev_t devt = inode->i_rdev;
      int dev;
  
-    if (atomic_read(&filp->f_count) > 1)
+    if (file_count(filp) > 1)
         return 0;
  
      dev = TAPE_NR(devt);
diff --git a/drivers/sgi/char/usema.c b/drivers/sgi/char/usema.c

index e91a944568efff7d5e89445fba5d99355d5c5bca..40db75465589cf610650b823cd49f4a80a0fbc5d 100644 (file)
--- a/drivers/sgi/char/usema.c
+++ b/drivers/sgi/char/usema.c
@@ -53,8 +53,8 @@ sgi_usema_attach (usattach_t * attach, struct irix_usema *usema)
         if (newfd < 0)
                 return newfd;
         
-       current->files->fd [newfd] = usema->filp;
-       atomic_inc(&usema->filp->f_count);
+       get_file(usema);
+       fd_install(newfd, usema->filp);
         /* Is that it? */
         printk("UIOCATTACHSEMA: new usema fd is %d", newfd);
         return newfd;
diff --git a/fs/buffer.c b/fs/buffer.c

index 0ddd121ab7463bda70d2f3c2f199fd96205234b7..108b385eaa4e4409badefe4a5ba9e769839bd719 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1597,7 +1597,7 @@ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
                 bh = bh->b_this_page;
         } while (bh != head);
         if (rw == READ)
-               ++current->mm->maj_flt;
+               ++current->maj_flt;
         if ((rw == READ) && nr) {
                 if (Page_Uptodate(page))
                         BUG();
@@ -1663,7 +1663,7 @@ int block_read_full_page(struct file * file, struct page * page)
                 nr++;
         } while (iblock++, (bh = bh->b_this_page) != head);
  
-       ++current->mm->maj_flt;
+       ++current->maj_flt;
         if (nr) {
                 if (Page_Uptodate(page))
                         BUG();
diff --git a/fs/dquot.c b/fs/dquot.c

index dfef0a63ab415375c1bf371bb9042336c481d98c..9dfbac082a679409ed49d833afc2a09215336cd5 100644 (file)
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -577,32 +577,35 @@ we_slept:
  static void add_dquot_ref(kdev_t dev, short type)
  {
         struct super_block *sb = get_super(dev);
-       struct file *filp;
+       struct list_head *p;
         struct inode *inode;
  
         if (!sb || !sb->dq_op)
                 return; /* nothing to do */
  
-       for (filp = inuse_filps; filp; filp = filp->f_next) {
+       file_list_lock();
+       for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
+               struct file *filp = list_entry(p, struct file, f_list);
                 if (!filp->f_dentry)
                         continue;
-               if (filp->f_dentry->d_sb != sb)
-                       continue;
                 inode = filp->f_dentry->d_inode;
                 if (!inode)
                         continue;
                 /* N.B. race problem -- filp could become unused */
                 if (filp->f_mode & FMODE_WRITE) {
+                       file_list_unlock();
                         sb->dq_op->initialize(inode, type);
                         inode->i_flags |= S_QUOTA;
+                       file_list_lock();
                 }
         }
+       file_list_unlock();
  }
  
  static void reset_dquot_ptrs(kdev_t dev, short type)
  {
         struct super_block *sb = get_super(dev);
-       struct file *filp;
+       struct list_head *p;
         struct inode *inode;
         struct dquot *dquot;
         int cnt;
@@ -614,11 +617,11 @@ restart:
         /* free any quota for unused dentries */
         shrink_dcache_sb(sb);
  
-       for (filp = inuse_filps; filp; filp = filp->f_next) {
+       file_list_lock();
+       for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
+               struct file *filp = list_entry(p, struct file, f_list);
                 if (!filp->f_dentry)
                         continue;
-               if (filp->f_dentry->d_sb != sb)
-                       continue;
                 inode = filp->f_dentry->d_inode;
                 if (!inode)
                         continue;
@@ -637,12 +640,14 @@ restart:
                         inode->i_flags &= ~S_QUOTA;
                 put_it:
                         if (dquot != NODQUOT) {
+                               file_list_unlock();
                                 dqput(dquot);
                                 /* we may have blocked ... */
                                 goto restart;
                         }
                 }
         }
+       file_list_unlock();
  }
  
  static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number)
diff --git a/fs/exec.c b/fs/exec.c

index 83b1834de5db67523ee726bd6a581a37030d6cb8..7215f69f15ff3aef265feb716f9c0c7ae67199e0 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -119,8 +119,12 @@ int open_dentry(struct dentry * dentry, int mode)
  {
         struct inode * inode = dentry->d_inode;
         struct file * f;
+       struct list_head * l = NULL;
         int fd, error;
  
+       if (inode->i_sb)
+               l = &inode->i_sb->s_files;
+
         error = -EINVAL;
         if (!inode->i_op || !inode->i_op->default_file_ops)
                 goto out;
@@ -141,6 +145,7 @@ int open_dentry(struct dentry * dentry, int mode)
                         if (error)
                                 goto out_filp;
                 }
+               file_move(f, l);
                 fd_install(fd, f);
                 dget(dentry);
         }
diff --git a/fs/file_table.c b/fs/file_table.c

index 80c3a08bab79891c9bafab74ef332df16e4fc404..cb9ef16e95a81970ce729448a4f6a899dfda1d72 100644 (file)
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -9,6 +9,7 @@
  #include <linux/slab.h>
  #include <linux/file.h>
  #include <linux/init.h>
+#include <linux/smp_lock.h>
  
  /* SLAB cache for filp's. */
  static kmem_cache_t *filp_cache;
@@ -18,37 +19,12 @@ int nr_files = 0;   /* read only */
  int nr_free_files = 0; /* read only */
  int max_files = NR_FILE;/* tunable */
  
-/* Free list management, if you are here you must have f_count == 0 */
-static struct file * free_filps = NULL;
-
-static void insert_file_free(struct file *file)
-{
-       if((file->f_next = free_filps) != NULL)
-               free_filps->f_pprev = &file->f_next;
-       free_filps = file;
-       file->f_pprev = &free_filps;
-       nr_free_files++;
-}
-
-/* The list of in-use filp's must be exported (ugh...) */
-struct file *inuse_filps = NULL;
-
-static inline void put_inuse(struct file *file)
-{
-       if((file->f_next = inuse_filps) != NULL)
-               inuse_filps->f_pprev = &file->f_next;
-       inuse_filps = file;
-       file->f_pprev = &inuse_filps;
-}
-
-/* It does not matter which list it is on. */
-static inline void remove_filp(struct file *file)
-{
-       if(file->f_next)
-               file->f_next->f_pprev = file->f_pprev;
-       *file->f_pprev = file->f_next;
-}
-
+/* Here the new files go */
+static LIST_HEAD(anon_list);
+/* And here the free ones sit */
+static LIST_HEAD(free_list);
+/* public *and* exported. Not pretty! */
+spinlock_t files_lock = SPIN_LOCK_UNLOCKED;
  
  void __init file_table_init(void)
  {
@@ -67,24 +43,30 @@ void __init file_table_init(void)
  /* Find an unused file structure and return a pointer to it.
   * Returns NULL, if there are no more free file structures or
   * we run out of memory.
+ *
+ * SMP-safe.
   */
  struct file * get_empty_filp(void)
  {
         static int old_max = 0;
         struct file * f;
  
+       file_list_lock();
         if (nr_free_files > NR_RESERVED_FILES) {
         used_one:
-               f = free_filps;
-               remove_filp(f);
+               f = list_entry(free_list.next, struct file, f_list);
+               list_del(&f->f_list);
                 nr_free_files--;
         new_one:
+               file_list_unlock();
                 memset(f, 0, sizeof(*f));
-               atomic_set(&f->f_count, 1);
+               atomic_set(&f->f_count,1);
                 f->f_version = ++event;
                 f->f_uid = current->fsuid;
                 f->f_gid = current->fsgid;
-               put_inuse(f);
+               file_list_lock();
+               list_add(&f->f_list, &anon_list);
+               file_list_unlock();
                 return f;
         }
         /*
@@ -96,7 +78,9 @@ struct file * get_empty_filp(void)
          * Allocate a new one if we're below the limit.
          */
         if (nr_files < max_files) {
+               file_list_unlock();
                 f = kmem_cache_alloc(filp_cache, SLAB_KERNEL);
+               file_list_lock();
                 if (f) {
                         nr_files++;
                         goto new_one;
@@ -108,6 +92,7 @@ struct file * get_empty_filp(void)
                 printk("VFS: file-max limit %d reached\n", max_files);
                 old_max = max_files;
         }
+       file_list_unlock();
         return NULL;
  }
  
@@ -131,20 +116,77 @@ int init_private_file(struct file *filp, struct dentry *dentry, int mode)
                 return 0;
  }
  
-void fput(struct file *file)
+void _fput(struct file *file)
  {
-       if (atomic_dec_and_test(&file->f_count)) {
-               locks_remove_flock(file);
-               __fput(file);
-               remove_filp(file);
-               insert_file_free(file);
-       }
+       atomic_inc(&file->f_count);
+
+       lock_kernel();
+       locks_remove_flock(file);       /* Still need the */
+       __fput(file);                   /* big lock here. */
+       unlock_kernel();
+
+       atomic_set(&file->f_count, 0);
+       file_list_lock();
+       list_del(&file->f_list);
+       list_add(&file->f_list, &free_list);
+       nr_free_files++;
+       file_list_unlock();
  }
  
+/* Here. put_filp() is SMP-safe now. */
+
  void put_filp(struct file *file)
  {
-       if (atomic_dec_and_test(&file->f_count)) {
-               remove_filp(file);
-               insert_file_free(file);
+       if(atomic_dec_and_test(&file->f_count)) {
+               file_list_lock();
+               list_del(&file->f_list);
+               list_add(&file->f_list, &free_list);
+               nr_free_files++;
+               file_list_unlock();
+       }
+}
+
+void file_move(struct file *file, struct list_head *list)
+{
+       if (!list)
+               return;
+       file_list_lock();
+       list_del(&file->f_list);
+       list_add(&file->f_list, list);
+       file_list_unlock();
+}
+
+void file_moveto(struct file *new, struct file *old)
+{
+       file_list_lock();
+       list_del(&new->f_list);
+       list_add(&new->f_list, &old->f_list);
+       file_list_unlock();
+}
+
+int fs_may_remount_ro(struct super_block *sb)
+{
+       struct list_head *p;
+
+       /* Check that no files are currently opened for writing. */
+       file_list_lock();
+       for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
+               struct file *file = list_entry(p, struct file, f_list);
+               struct inode *inode = file->f_dentry->d_inode;
+               if (!inode)
+                       continue;
+
+               /* File with pending delete? */
+               if (inode->i_nlink == 0)
+                       goto too_bad;
+
+               /* Writable file? */
+               if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
+                       return 0;
         }
+       file_list_unlock();
+       return 1; /* Tis' cool bro. */
+too_bad:
+       file_list_unlock();
+       return 0;
  }
diff --git a/fs/inode.c b/fs/inode.c

index 01fc64d235ee8dc9581644a07eba8510a5e386cd..bcf36de99805b84c3e5d5b32e424deb832ad4b47 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -818,31 +818,6 @@ void __init inode_init(void)
         max_inodes = max;
  }
  
-/* This belongs in file_table.c, not here... */
-int fs_may_remount_ro(struct super_block *sb)
-{
-       struct file *file;
-
-       /* Check that no files are currently opened for writing. */
-       for (file = inuse_filps; file; file = file->f_next) {
-               struct inode *inode;
-               if (!file->f_dentry)
-                       continue;
-               inode = file->f_dentry->d_inode;
-               if (!inode || inode->i_sb != sb)
-                       continue;
-
-               /* File with pending delete? */
-               if (inode->i_nlink == 0)
-                       return 0;
-
-               /* Writable file? */
-               if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
-                       return 0;
-       }
-       return 1; /* Tis' cool bro. */
-}
-
  void update_atime (struct inode *inode)
  {
      if ( IS_NOATIME (inode) ) return;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c

index 8b63cbf662c3f20084f2da62281be244c8dd1e97..77c1db0915b6997dcb05450e7cd81946030074fd 100644 (file)
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -305,6 +305,7 @@ create_write_request(struct file * file, struct page *page, unsigned int offset,
                 goto out_req;
  
         /* Put the task on inode's writeback request list. */
+       get_file(file);
         wreq->wb_file = file;
         wreq->wb_pid    = current->pid;
         wreq->wb_page   = page;
@@ -467,7 +468,6 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig
          * The IO completion will then free the page and the dentry.
          */
         get_page(page);
-       atomic_inc(&file->f_count);
  
         /* Schedule request */
         synchronous = schedule_write_request(req, synchronous);
diff --git a/fs/open.c b/fs/open.c

index deb898f768dd07c803cbcf77b75dc8a6537729d8..06ac9610cd04205b6aea5521644549b425f32a4f 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -663,6 +663,8 @@ struct file *filp_open(const char * filename, int flags, int mode)
         f->f_op = NULL;
         if (inode->i_op)
                 f->f_op = inode->i_op->default_file_ops;
+       if (inode->i_sb)
+               file_move(f, &inode->i_sb->s_files);
         if (f->f_op && f->f_op->open) {
                 error = f->f_op->open(inode,f);
                 if (error)
diff --git a/fs/proc/array.c b/fs/proc/array.c

index 49d0d005877609a8635284d312485534ceb617dc..66108f9a79f015473c47698112aeacf6222a1c1b 100644 (file)
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -906,10 +906,10 @@ static int get_stat(int pid, char * buffer)
                 tsk->tty ? kdev_t_to_nr(tsk->tty->device) : 0,
                 tty_pgrp,
                 tsk->flags,
-               tsk->mm ? tsk->mm->min_flt : 0,
-               tsk->mm ? tsk->mm->cmin_flt : 0,
-               tsk->mm ? tsk->mm->maj_flt : 0,
-               tsk->mm ? tsk->mm->cmaj_flt : 0,
+               tsk->min_flt,
+               tsk->cmin_flt,
+               tsk->maj_flt,
+               tsk->cmaj_flt,
                 tsk->times.tms_utime,
                 tsk->times.tms_stime,
                 tsk->times.tms_cutime,
@@ -936,8 +936,8 @@ static int get_stat(int pid, char * buffer)
                 sigign      .sig[0] & 0x7fffffffUL,
                 sigcatch    .sig[0] & 0x7fffffffUL,
                 wchan,
-               tsk->mm ? tsk->mm->nswap : 0,
-               tsk->mm ? tsk->mm->cnswap : 0,
+               tsk->nswap,
+               tsk->cnswap,
                 tsk->exit_signal,
                 tsk->processor);
  }
diff --git a/fs/proc/inode.c b/fs/proc/inode.c

index 970e63a96b3a8672b2ed7e7cf9f3ace5ccca613e..4d329d090bd9bc77ab233bca1e461159188072f8 100644 (file)
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -87,13 +87,26 @@ static void proc_delete_inode(struct inode *inode)
         }
  }
  
+struct super_block *proc_super_blocks = NULL;
+
+static void proc_put_super(struct super_block *sb)
+{
+       struct super_block **p = &proc_super_blocks;
+       while (*p != sb) {
+               if (!*p)        /* should never happen */
+                       return;
+               p = (struct super_block **)&(*p)->u.generic_sbp;
+       }
+       *p = (struct super_block *)(*p)->u.generic_sbp;
+}
+
  static struct super_operations proc_sops = { 
         proc_read_inode,
         proc_write_inode,
         proc_put_inode,
         proc_delete_inode,      /* delete_inode(struct inode *) */
         NULL,
-       NULL,
+       proc_put_super,
         NULL,
         proc_statfs,
         NULL
@@ -323,6 +336,8 @@ struct super_block *proc_read_super(struct super_block *s,void *data,
         if (!s->s_root)
                 goto out_no_root;
         parse_options(data, &root_inode->i_uid, &root_inode->i_gid);
+       s->u.generic_sbp = (void*) proc_super_blocks;
+       proc_super_blocks = s;
         unlock_super(s);
         return s;
  
diff --git a/fs/proc/mem.c b/fs/proc/mem.c

index b095df35361937de43278f0d5aa4ac1141f7fafb..4d599c77b50d4448461c89154d84bb115f6d8636 100644 (file)
--- a/fs/proc/mem.c
+++ b/fs/proc/mem.c
@@ -289,10 +289,10 @@ int mem_mmap(struct file * file, struct vm_area_struct * vma)
                         return -ENOMEM;
  
                 if (!pte_present(*src_table))
-                       handle_mm_fault(tsk->mm, src_vma, stmp, 1);
+                       handle_mm_fault(tsk, src_vma, stmp, 1);
  
                 if ((vma->vm_flags & VM_WRITE) && !pte_write(*src_table))
-                       handle_mm_fault(tsk->mm, src_vma, stmp, 1);
+                       handle_mm_fault(tsk, src_vma, stmp, 1);
  
                 set_pte(src_table, pte_mkdirty(*src_table));
                 set_pte(dest_table, *src_table);
diff --git a/fs/proc/root.c b/fs/proc/root.c

index 31b89ca824ba01e51c848e510dc90281bfd6924b..a6c05e91e95506e49a8733c8670c9054da18b537 100644 (file)
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -365,24 +365,36 @@ int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
   */
  static void proc_kill_inodes(int ino)
  {
-       struct file *filp;
-
-       /* inuse_filps is protected by the single kernel lock */
-       for (filp = inuse_filps; filp; filp = filp->f_next) {
-               struct dentry * dentry;
-               struct inode * inode;
-
-               dentry = filp->f_dentry;
-               if (!dentry)
-                       continue;
-               if (dentry->d_op != &proc_dentry_operations)
-                       continue;
-               inode = dentry->d_inode;
-               if (!inode)
-                       continue;
-               if (inode->i_ino != ino)
-                       continue;
-               filp->f_op = NULL;
+       struct list_head *p;
+       struct super_block *sb;
+
+       /*
+        * Actually it's a partial revoke(). We have to go through all
+        * copies of procfs. proc_super_blocks is protected by the big
+        * lock for the time being.
+        */
+       for (sb = proc_super_blocks;
+            sb;
+            sb = (struct super_block*)sb->u.generic_sbp) {
+               file_list_lock();
+               for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
+                       struct file * filp = list_entry(p, struct file, f_list);
+                       struct dentry * dentry;
+                       struct inode * inode;
+
+                       dentry = filp->f_dentry;
+                       if (!dentry)
+                               continue;
+                       if (dentry->d_op != &proc_dentry_operations)
+                               continue;
+                       inode = dentry->d_inode;
+                       if (!inode)
+                               continue;
+                       if (inode->i_ino != ino)
+                               continue;
+                       filp->f_op = NULL;
+               }
+               file_list_unlock();
         }
  }
  
diff --git a/fs/super.c b/fs/super.c

index 5cf5189597d05a77f776eb66f3f1d66ac223ff42..f708150946f5a0fa745a4872a8050a814825d1f6 100644 (file)
--- a/fs/super.c
+++ b/fs/super.c
@@ -531,6 +531,7 @@ static struct super_block *get_empty_super(void)
                 INIT_LIST_HEAD(&s->s_dirty);
                 list_add (&s->s_list, super_blocks.prev);
                 init_waitqueue_head(&s->s_wait);
+               INIT_LIST_HEAD(&s->s_files);
         }
         return s;
  }
diff --git a/include/linux/file.h b/include/linux/file.h

index 5efa992964112311ccba234278f0b8f1a5b6ac2d..699098733ce85866a1d319355c9d844e930e890c 100644 (file)
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -5,7 +5,8 @@
  #ifndef __LINUX_FILE_H
  #define __LINUX_FILE_H
  
-extern void __fput(struct file *);
+extern void __fput(struct file *);     /* goner? */
+extern void _fput(struct file *);
  
  /*
   * Check whether the specified task has the fd open. Since the task
@@ -78,7 +79,11 @@ extern inline void fd_install(unsigned int fd, struct file * file)
   * I suspect there are many other similar "optimizations" across the
   * kernel...
   */
-extern void fput(struct file *); 
+extern inline void fput(struct file * file)
+{
+       if (atomic_dec_and_test(&file->f_count))
+               _fput(file);
+}
  extern void put_filp(struct file *);
  
  #endif /* __LINUX_FILE_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 7d60fee3170d0d26964dfc75148d3870ce1c5f42..14d71253df55e43bc03874cbb3fbae2975b47c53 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -400,7 +400,7 @@ struct fown_struct {
  };
  
  struct file {
-       struct file             *f_next, **f_pprev;
+       struct list_head        f_list;
         struct dentry           *f_dentry;
         struct file_operations  *f_op;
         atomic_t                f_count;
@@ -417,6 +417,9 @@ struct file {
         /* needed for tty driver, and maybe others */
         void                    *private_data;
  };
+extern spinlock_t files_lock;
+#define file_list_lock() spin_lock(&files_lock);
+#define file_list_unlock() spin_unlock(&files_lock);
  
  #define get_file(x)    atomic_inc(&(x)->f_count)
  #define file_count(x)  atomic_read(&(x)->f_count)
@@ -527,6 +530,7 @@ struct super_block {
         short int               s_ibasket_count;
         short int               s_ibasket_max;
         struct list_head        s_dirty;        /* dirty inodes */
+       struct list_head        s_files;
  
         union {
                 struct minix_sb_info    minix_sb;
@@ -745,8 +749,6 @@ extern struct file_system_type *get_fs_type(const char *);
  extern int fs_may_remount_ro(struct super_block *);
  extern int fs_may_mount(kdev_t);
  
-extern struct file *inuse_filps;
-
  extern int try_to_free_buffers(struct page *);
  extern void refile_buffer(struct buffer_head * buf);
  
@@ -855,6 +857,8 @@ extern struct inode * get_empty_inode(void);
  extern void insert_inode_hash(struct inode *);
  extern void remove_inode_hash(struct inode *);
  extern struct file * get_empty_filp(void);
+extern void file_move(struct file *f, struct list_head *list);
+extern void file_moveto(struct file *new, struct file *old);
  extern struct buffer_head * get_hash_table(kdev_t, int, int);
  extern struct buffer_head * getblk(kdev_t, int, int);
  extern void ll_rw_block(int, int, struct buffer_head * bh[]);
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 5429bb3f9409757dc44038501f9f4cdb85b660e5..cb5a5809ee3ad6feccee69408f82685a89c072e2 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -314,8 +314,8 @@ extern int remap_page_range(unsigned long from, unsigned long to, unsigned long
  extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
  
  extern void vmtruncate(struct inode * inode, unsigned long offset);
-extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
-extern void make_pages_present(unsigned long addr, unsigned long end);
+extern int handle_mm_fault(struct task_struct *tsk,struct vm_area_struct *vma, unsigned long address, int write_access);
+extern int make_pages_present(unsigned long addr, unsigned long end);
  
  extern int pgt_cache_water[2];
  extern int check_pgt_cache(void);
@@ -406,7 +406,7 @@ static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * m
         return vma;
  }
  
-extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
+extern struct vm_area_struct *find_extend_vma(struct task_struct *tsk, unsigned long addr);
  
  #define buffer_under_min()     ((atomic_read(&buffermem) >> PAGE_SHIFT) * 100 < \
                                 buffer_mem.min_percent * num_physpages)
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h

index be5c2c66609f410d811f56f49b76bc7a54ba7fed..a349c3a7c92ef283e0346231df013522cde587cf 100644 (file)
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -375,6 +375,7 @@ static inline int proc_scsi_unregister(struct proc_dir_entry *driver, int x)
      }
  }
  
+extern struct super_block *proc_super_blocks;
  extern struct dentry_operations proc_dentry_operations;
  extern struct super_block *proc_read_super(struct super_block *,void *,int);
  extern int init_proc_fs(void);
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 251aa8f9d205a795ed0082ca23615dff65933356..df3753dbed66e3d19c343a8ce68dd2b0ba4a07da 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -172,10 +172,8 @@ struct mm_struct {
         atomic_t count;
         int map_count;                          /* number of VMAs */
         struct semaphore mmap_sem;
-       rwlock_t page_table_lock;
+       spinlock_t page_table_lock;
         unsigned long context;
-       unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
-       int swappable:1;
         unsigned long start_code, end_code, start_data, end_data;
         unsigned long start_brk, brk, start_stack;
         unsigned long arg_start, arg_end, env_start, env_end;
@@ -196,9 +194,7 @@ struct mm_struct {
                 swapper_pg_dir,                         \
                 ATOMIC_INIT(1), 1,                      \
                 __MUTEX_INITIALIZER(name.mmap_sem),     \
-               RW_LOCK_UNLOCKED,                       \
-               0,                                      \
-               0, 0, 0, 0, 0, 0,                       \
+               SPIN_LOCK_UNLOCKED,                     \
                 0,                                      \
                 0, 0, 0, 0,                             \
                 0, 0, 0,                                \
@@ -286,6 +282,9 @@ struct task_struct {
         struct tms times;
         unsigned long start_time;
         long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS];
+/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
+       unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
+       int swappable:1;
  /* process credentials */
         uid_t uid,euid,suid,fsuid;
         gid_t gid,egid,sgid,fsgid;
@@ -368,6 +367,8 @@ struct task_struct {
  /* timer */    { NULL, NULL, 0, 0, it_real_fn }, \
  /* utime */    {0,0,0,0},0, \
  /* per CPU times */ {0, }, {0, }, \
+/* flt */      0,0,0,0,0,0, \
+/* swp */      0, \
  /* process credentials */                                      \
  /* uid etc */  0,0,0,0,0,0,0,0,                                \
  /* suppl grps*/ 0, {0,},                                       \
diff --git a/include/linux/tty.h b/include/linux/tty.h

index dd2a63daeb1c333ea93c4ac0a8cb05ee4f4f6c23..edf1ff28f44fd4c888c3a8bd179e2f7207b6c6ec 100644 (file)
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -277,6 +277,7 @@ struct tty_struct {
         struct tq_struct tq_hangup;
         void *disc_data;
         void *driver_data;
+       struct list_head tty_files;
  
  #define N_TTY_BUF_SIZE 4096
         
diff --git a/include/net/sock.h b/include/net/sock.h

index 73ef988a72d82ee6f0b78f8ec1e01bf5cdb637f6..af6bf7679df0ae22c24af02fade4fbd3e72af290 100644 (file)
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -371,10 +371,6 @@ do {       spin_lock_init(&((__sk)->lock.slock)); \
  } while(0);
  
  struct sock {
-       /* This must be first. */
-       struct sock             *sklist_next;
-       struct sock             *sklist_prev;
-
         /* Local port binding hash linkage. */
         struct sock             *bind_next;
         struct sock             **bind_pprev;
@@ -579,10 +575,6 @@ do {       if((__sk)->backlog.tail == NULL) {              \
   * transport -> network interface is defined by struct inet_proto
   */
  struct proto {
-       /* These must be first. */
-       struct sock             *sklist_next;
-       struct sock             *sklist_prev;
-
         void                    (*close)(struct sock *sk, 
                                         long timeout);
         int                     (*connect)(struct sock *sk,
@@ -621,9 +613,7 @@ struct proto {
         /* Keeping track of sk's, looking them up, and port selection methods. */
         void                    (*hash)(struct sock *sk);
         void                    (*unhash)(struct sock *sk);
-       void                    (*rehash)(struct sock *sk);
-       unsigned short          (*good_socknum)(void);
-       int                     (*verify_bind)(struct sock *sk, unsigned short snum);
+       int                     (*get_port)(struct sock *sk, unsigned short snum);
  
         unsigned short          max_header;
         unsigned long           retransmits;
@@ -667,40 +657,6 @@ extern rwlock_t sockhash_lock;
  #define SOCKHASH_LOCK_WRITE_BH()       write_lock(&sockhash_lock)
  #define SOCKHASH_UNLOCK_WRITE_BH()     write_unlock(&sockhash_lock)
  
-/* Some things in the kernel just want to get at a protocols
- * entire socket list commensurate, thus...
- */
-static __inline__ void add_to_prot_sklist(struct sock *sk)
-{
-       SOCKHASH_LOCK_WRITE();
-       if(!sk->sklist_next) {
-               struct proto *p = sk->prot;
-
-               sk->sklist_prev = (struct sock *) p;
-               sk->sklist_next = p->sklist_next;
-               p->sklist_next->sklist_prev = sk;
-               p->sklist_next = sk;
-
-               /* Charge the protocol. */
-               sk->prot->inuse += 1;
-               if(sk->prot->highestinuse < sk->prot->inuse)
-                       sk->prot->highestinuse = sk->prot->inuse;
-       }
-       SOCKHASH_UNLOCK_WRITE();
-}
-
-static __inline__ void del_from_prot_sklist(struct sock *sk)
-{
-       SOCKHASH_LOCK_WRITE();
-       if(sk->sklist_next) {
-               sk->sklist_next->sklist_prev = sk->sklist_prev;
-               sk->sklist_prev->sklist_next = sk->sklist_next;
-               sk->sklist_next = NULL;
-               sk->prot->inuse--;
-       }
-       SOCKHASH_UNLOCK_WRITE();
-}
-
  /* Used by processes to "lock" a socket state, so that
   * interrupts and bottom half handlers won't change it
   * from under us. It essentially blocks any incoming
diff --git a/include/net/tcp.h b/include/net/tcp.h

index 28fc52125dfe658e394e0febab5ad37a5c694ee5..9127795c266b6368d48cb0a43faf74f4e82a886f 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -72,11 +72,7 @@ extern struct sock *tcp_listening_hash[TCP_LHTABLE_SIZE];
   */
  struct tcp_bind_bucket {
         unsigned short          port;
-       unsigned short          flags;
-#define TCPB_FLAG_LOCKED       0x0001
-#define TCPB_FLAG_FASTREUSE    0x0002
-#define TCPB_FLAG_GOODSOCKNUM  0x0004
-
+       unsigned short          fastreuse;
         struct tcp_bind_bucket  *next;
         struct sock             *owners;
         struct tcp_bind_bucket  **pprev;
@@ -115,32 +111,6 @@ static __inline__ int tcp_bhashfn(__u16 lport)
         return (lport & (tcp_bhash_size - 1));
  }
  
-static __inline__ void tcp_sk_bindify(struct sock *sk)
-{
-       struct tcp_bind_bucket *tb;
-       unsigned short snum = sk->num;
-
-       for(tb = tcp_bhash[tcp_bhashfn(snum)]; tb->port != snum; tb = tb->next)
-               ;
-       /* Update bucket flags. */
-       if(tb->owners == NULL) {
-               /* We're the first. */
-               if(sk->reuse && sk->state != TCP_LISTEN)
-                       tb->flags = TCPB_FLAG_FASTREUSE;
-               else
-                       tb->flags = 0;
-       } else {
-               if((tb->flags & TCPB_FLAG_FASTREUSE) &&
-                  ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))
-                       tb->flags &= ~TCPB_FLAG_FASTREUSE;
-       }
-       if((sk->bind_next = tb->owners) != NULL)
-               tb->owners->bind_pprev = &sk->bind_next;
-       tb->owners = sk;
-       sk->bind_pprev = &tb->owners;
-       sk->prev = (struct sock *) tb;
-}
-
  /* This is a TIME_WAIT bucket.  It works around the memory consumption
   * problems of sockets in such a state on heavily loaded servers, but
   * without violating the protocol specification.
@@ -150,8 +120,6 @@ struct tcp_tw_bucket {
          * XXX Yes I know this is gross, but I'd have to edit every single
          * XXX networking file if I created a "struct sock_header". -DaveM
          */
-       struct sock             *sklist_next;
-       struct sock             *sklist_prev;
         struct sock             *bind_next;
         struct sock             **bind_pprev;
         __u32                   daddr;
@@ -477,7 +445,9 @@ extern __inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
  extern struct proto tcp_prot;
  extern struct tcp_mib tcp_statistics;
  
-extern unsigned short          tcp_good_socknum(void);
+extern void                    tcp_put_port(struct sock *sk);
+extern void                    __tcp_put_port(struct sock *sk);
+extern void                    tcp_inherit_port(struct sock *sk, struct sock *child);
  
  extern void                    tcp_v4_err(struct sk_buff *skb,
                                            unsigned char *, int);
@@ -630,8 +600,7 @@ struct tcp_sl_timer {
  #define TCP_SLT_SYNACK         0
  #define TCP_SLT_KEEPALIVE      1
  #define TCP_SLT_TWKILL         2
-#define TCP_SLT_BUCKETGC       3
-#define TCP_SLT_MAX            4
+#define TCP_SLT_MAX            3
  
  extern struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX];
   
@@ -1069,17 +1038,6 @@ extern __inline__ void tcp_dec_slow_timer(int timer)
         atomic_dec(&slt->count);
  }
  
-/* This needs to use a slow timer, so it is here. */
-static __inline__ void tcp_sk_unbindify(struct sock *sk)
-{
-       struct tcp_bind_bucket *tb = (struct tcp_bind_bucket *) sk->prev;
-       if(sk->bind_next)
-               sk->bind_next->bind_pprev = sk->bind_pprev;
-       *sk->bind_pprev = sk->bind_next;
-       if(tb->owners == NULL)
-               tcp_inc_slow_timer(TCP_SLT_BUCKETGC);
-}
-
  extern const char timer_bug_msg[];
  
  static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
diff --git a/include/net/udp.h b/include/net/udp.h

index 30a59b3f09132ad84632da4bad8cafbf76a8a63b..dcc2fd09328414a7ce700db72f0b84cae6ff9ff6 100644 (file)
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -23,6 +23,7 @@
  #define _UDP_H
  
  #include <linux/udp.h>
+#include <net/sock.h>
  
  #define UDP_HTABLE_SIZE                128
  
@@ -32,7 +33,18 @@
   */
  extern struct sock *udp_hash[UDP_HTABLE_SIZE];
  
-extern unsigned short udp_good_socknum(void);
+extern int udp_port_rover;
+
+static inline int udp_lport_inuse(u16 num)
+{
+       struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)];
+
+       for(; sk != NULL; sk = sk->next) {
+               if(sk->num == num)
+                       return 1;
+       }
+       return 0;
+}
  
  /* Note: this must match 'valbool' in sock_setsockopt */
  #define UDP_CSUM_NOXMIT                1
diff --git a/ipc/shm.c b/ipc/shm.c

index 395104aaf89de6e2e53fbf3db8a6e42c7c0ea8d0..a02bc8ad124efc5d68e9c58ebd1dd8f91ad3ecf2 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -675,10 +675,10 @@ static unsigned long shm_nopage(struct vm_area_struct * shmd, unsigned long addr
                 pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
                 shp->shm_pages[idx] = pte_val(pte);
         } else
-               --current->mm->maj_flt;  /* was incremented in do_no_page */
+               --current->maj_flt;  /* was incremented in do_no_page */
  
  done:  /* pte_val(pte) == shp->shm_pages[idx] */
-       current->mm->min_flt++;
+       current->min_flt++;
         get_page(mem_map + MAP_NR(pte_page(pte)));
         return pte_page(pte);
  }
diff --git a/kernel/acct.c b/kernel/acct.c

index 63ee87150ee24f0c83c55636b154ad751647bcc2..c6142afc71baddb05cb40040478df5ab38b9967e 100644 (file)
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -276,7 +276,7 @@ static int do_acct_process(long exitcode, struct file *file)
          */
         if (!file)
                 return 0;
-       atomic_inc(&file->f_count);
+       get_file(file);
         if (!check_free_space(file)) {
                 fput(file);
                 return 0;
diff --git a/kernel/exit.c b/kernel/exit.c

index a4ac8ae66eb8d8da5c896afe10148a44d43adf44..57d8441da5e1e7056f83e05ba3ccf31d115f57a5 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -52,11 +52,9 @@ static void release(struct task_struct * p)
                 write_unlock_irq(&tasklist_lock);
  
                 release_thread(p);
-#if 0 /* FIXME! How do we do this right for threads? */
                 current->cmin_flt += p->min_flt + p->cmin_flt;
                 current->cmaj_flt += p->maj_flt + p->cmaj_flt;
                 current->cnswap += p->nswap + p->cnswap;
-#endif
                 free_task_struct(p);
         } else {
                 printk("task releasing itself\n");
@@ -258,6 +256,7 @@ static inline void __exit_mm(struct task_struct * tsk)
                 flush_tlb_mm(mm);
                 destroy_context(mm);
                 tsk->mm = &init_mm;
+               tsk->swappable = 0;
                 SET_PAGE_DIR(tsk, swapper_pg_dir);
                 mm_release();
                 mmput(mm);
diff --git a/kernel/fork.c b/kernel/fork.c

index bb4bec1ea5eb9a2993f211e23a4ba90f8426977f..c940938838def29b67f0cfe725fec88f86d73786 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -304,7 +304,7 @@ struct mm_struct * mm_alloc(void)
                 mm->map_count = 0;
                 mm->def_flags = 0;
                 init_MUTEX_LOCKED(&mm->mmap_sem);
-               mm->page_table_lock = RW_LOCK_UNLOCKED;
+               mm->page_table_lock = SPIN_LOCK_UNLOCKED;
                 /*
                  * Leave mm->pgd set to the parent's pgd
                  * so that pgd_offset() is always valid.
@@ -315,7 +315,6 @@ struct mm_struct * mm_alloc(void)
                  * cache or tlb.
                  */
                 mm->cpu_vm_mask = 0;
-               mm->swappable = 0;
         }
         return mm;
  }
@@ -378,6 +377,9 @@ static inline int copy_mm(int nr, unsigned long clone_flags, struct task_struct
                 goto fail_nomem;
  
         tsk->mm = mm;
+       tsk->min_flt = tsk->maj_flt = 0;
+       tsk->cmin_flt = tsk->cmaj_flt = 0;
+       tsk->nswap = tsk->cnswap = 0;
         copy_segments(nr, tsk, mm);
         retval = new_page_tables(tsk);
         if (retval)
@@ -575,6 +577,7 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
                 __MOD_INC_USE_COUNT(p->binfmt->module);
  
         p->did_exec = 0;
+       p->swappable = 0;
         p->state = TASK_UNINTERRUPTIBLE;
  
         copy_flags(clone_flags, p);
@@ -639,7 +642,7 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
         p->semundo = NULL;
  
         /* ok, now we should be set up.. */
-       p->mm->swappable = 1;
+       p->swappable = 1;
         p->exit_signal = clone_flags & CSIGNAL;
         p->pdeath_signal = 0;
  
diff --git a/kernel/ksyms.c b/kernel/ksyms.c

index 04fd2517aa383e70aee8242d0edcc3501be29584..8a30af4dbda998101de365a469744fe207537ff3 100644 (file)
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -116,7 +116,8 @@ EXPORT_SYMBOL(update_atime);
  EXPORT_SYMBOL(get_super);
  EXPORT_SYMBOL(get_fs_type);
  EXPORT_SYMBOL(getname);
-EXPORT_SYMBOL(__fput);
+EXPORT_SYMBOL(__fput); /* goner? */
+EXPORT_SYMBOL(_fput);
  EXPORT_SYMBOL(igrab);
  EXPORT_SYMBOL(iunique);
  EXPORT_SYMBOL(iget);
@@ -141,8 +142,8 @@ EXPORT_SYMBOL(get_empty_filp);
  EXPORT_SYMBOL(init_private_file);
  EXPORT_SYMBOL(filp_open);
  EXPORT_SYMBOL(filp_close);
-EXPORT_SYMBOL(fput);
  EXPORT_SYMBOL(put_filp);
+EXPORT_SYMBOL(files_lock);
  EXPORT_SYMBOL(check_disk_change);
  EXPORT_SYMBOL(invalidate_buffers);
  EXPORT_SYMBOL(invalidate_inodes);
diff --git a/kernel/sys.c b/kernel/sys.c

index 0179dc7ea29deb9bb982164a270933d1bcc2de2b..9ab1b2f4ce009393afd386540147001ccf2c74ec 100644 (file)
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -944,27 +944,27 @@ int getrusage(struct task_struct *p, int who, struct rusage *ru)
                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime);
                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime);
                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime);
-                       r.ru_minflt = 0;
-                       r.ru_majflt = 0;
-                       r.ru_nswap = 0;
+                       r.ru_minflt = p->min_flt;
+                       r.ru_majflt = p->maj_flt;
+                       r.ru_nswap = p->nswap;
                         break;
                 case RUSAGE_CHILDREN:
                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_cutime);
                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_cutime);
                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_cstime);
                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_cstime);
-                       r.ru_minflt = 0;
-                       r.ru_majflt = 0;
-                       r.ru_nswap = 0;
+                       r.ru_minflt = p->cmin_flt;
+                       r.ru_majflt = p->cmaj_flt;
+                       r.ru_nswap = p->cnswap;
                         break;
                 default:
                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime + p->times.tms_cutime);
                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime + p->times.tms_cutime);
                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime + p->times.tms_cstime);
                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime + p->times.tms_cstime);
-                       r.ru_minflt = 0;
-                       r.ru_majflt = 0;
-                       r.ru_nswap = 0;
+                       r.ru_minflt = p->min_flt + p->cmin_flt;
+                       r.ru_majflt = p->maj_flt + p->cmaj_flt;
+                       r.ru_nswap = p->nswap + p->cnswap;
                         break;
         }
         return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
diff --git a/mm/memory.c b/mm/memory.c

index c6cf211aa268d1d4064af7fccac5a6422b07bcdd..04d0f534d0c286dcf4108f30e8b7fe171bd536eb 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -605,7 +605,7 @@ unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsig
   * We enter with the page table read-lock held, and need to exit without
   * it.
   */
-static int do_wp_page(struct mm_struct * mm, struct vm_area_struct * vma,
+static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
         unsigned long address, pte_t *page_table, pte_t pte)
  {
         unsigned long old_page, new_page;
@@ -614,7 +614,7 @@ static int do_wp_page(struct mm_struct * mm, struct vm_area_struct * vma,
         old_page = pte_page(pte);
         if (MAP_NR(old_page) >= max_mapnr)
                 goto bad_wp_page;
-       mm->min_flt++;
+       tsk->min_flt++;
         page = mem_map + MAP_NR(old_page);
         
         /*
@@ -637,18 +637,18 @@ static int do_wp_page(struct mm_struct * mm, struct vm_area_struct * vma,
                 flush_cache_page(vma, address);
                 set_pte(page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
                 flush_tlb_page(vma, address);
-               read_unlock(&mm->page_table_lock);
+               spin_unlock(&tsk->mm->page_table_lock);
                 return 1;
         }
  
         /*
          * Ok, we need to copy. Oh, well..
          */
-       read_unlock(&mm->page_table_lock);
+       spin_unlock(&tsk->mm->page_table_lock);
         new_page = __get_free_page(GFP_USER);
         if (!new_page)
-               return 0;
-       read_lock(&mm->page_table_lock);
+               return -1;
+       spin_lock(&tsk->mm->page_table_lock);
  
         /*
          * Re-check the pte - we dropped the lock
@@ -666,13 +666,13 @@ static int do_wp_page(struct mm_struct * mm, struct vm_area_struct * vma,
                 /* Free the old page.. */
                 new_page = old_page;
         }
-       read_unlock(&mm->page_table_lock);
+       spin_unlock(&tsk->mm->page_table_lock);
         free_page(new_page);
         return 1;
  
  bad_wp_page:
         printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
-       return 0;
+       return -1;
  }
  
  /*
@@ -800,7 +800,7 @@ static void swapin_readahead(unsigned long entry)
         return;
  }
  
-static int do_swap_page(struct mm_struct * mm, 
+static int do_swap_page(struct task_struct * tsk,
         struct vm_area_struct * vma, unsigned long address,
         pte_t * page_table, unsigned long entry, int write_access)
  {
@@ -813,13 +813,13 @@ static int do_swap_page(struct mm_struct * mm,
                 page = read_swap_cache(entry);
                 unlock_kernel();
                 if (!page)
-                       return 0;
+                       return -1;
  
                 flush_page_to_ram(page_address(page));
         }
  
         vma->vm_mm->rss++;
-       mm->min_flt++;
+       tsk->min_flt++;
         swap_free(entry);
  
         pte = mk_pte(page_address(page), vma->vm_page_prot);
@@ -829,27 +829,30 @@ static int do_swap_page(struct mm_struct * mm,
                 pte = pte_mkwrite(pte_mkdirty(pte));
         }
         set_pte(page_table, pte);
-               
+       /* No need to invalidate - it was non-present before */
+       update_mmu_cache(vma, address, pte);
         return 1;
  }
  
  /*
   * This only needs the MM semaphore
   */
-static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
+static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
  {
         pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
         if (write_access) {
                 unsigned long page = __get_free_page(GFP_USER);
                 if (!page)
-                       return 0;
+                       return -1;
                 clear_page(page);
                 entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
                 vma->vm_mm->rss++;
-               mm->min_flt++;
+               tsk->min_flt++;
                 flush_page_to_ram(page);
         }
         set_pte(page_table, entry);
+       /* No need to invalidate - it was non-present before */
+       update_mmu_cache(vma, addr, entry);
         return 1;
  }
  
@@ -865,14 +868,14 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
   * This is called with the MM semaphore and the kernel lock held.
   * We need to release the kernel lock as soon as possible..
   */
-static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
+static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
         unsigned long address, int write_access, pte_t *page_table)
  {
         unsigned long page;
         pte_t entry;
  
         if (!vma->vm_ops || !vma->vm_ops->nopage)
-               return do_anonymous_page(mm, vma, page_table, write_access, address);
+               return do_anonymous_page(tsk, vma, page_table, write_access, address);
  
         /*
          * The third argument is "no_share", which tells the low-level code
@@ -881,9 +884,9 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
          */
         page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access);
         if (!page)
-               return 0;
+               return 0;       /* SIGBUS - but we _really_ should know whether it is OOM or SIGBUS */
  
-       ++mm->maj_flt;
+       ++tsk->maj_flt;
         ++vma->vm_mm->rss;
         /*
          * This silly early PAGE_DIRTY setting removes a race
@@ -904,6 +907,7 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
                 entry = pte_wrprotect(entry);
         set_pte(page_table, entry);
         /* no need to invalidate: a not-present page shouldn't be cached */
+       update_mmu_cache(vma, address, entry);
         return 1;
  }
  
@@ -925,7 +929,7 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
   * so we don't need to worry about a page being suddenly been added into
   * our VM.
   */
-static inline int handle_pte_fault(struct mm_struct *mm,
+static inline int handle_pte_fault(struct task_struct *tsk,
         struct vm_area_struct * vma, unsigned long address,
         int write_access, pte_t * pte)
  {
@@ -934,8 +938,8 @@ static inline int handle_pte_fault(struct mm_struct *mm,
         entry = *pte;
         if (!pte_present(entry)) {
                 if (pte_none(entry))
-                       return do_no_page(mm, vma, address, write_access, pte);
-               return do_swap_page(mm, vma, address, pte, pte_val(entry), write_access);
+                       return do_no_page(tsk, vma, address, write_access, pte);
+               return do_swap_page(tsk, vma, address, pte, pte_val(entry), write_access);
         }
  
         /*
@@ -943,25 +947,27 @@ static inline int handle_pte_fault(struct mm_struct *mm,
          * lock to synchronize with kswapd, and verify that the entry
          * didn't change from under us..
          */
-       read_lock(&mm->page_table_lock);
+       spin_lock(&tsk->mm->page_table_lock);
         if (pte_val(entry) == pte_val(*pte)) {
                 if (write_access) {
                         if (!pte_write(entry))
-                               return do_wp_page(mm, vma, address, pte, entry);
+                               return do_wp_page(tsk, vma, address, pte, entry);
  
                         entry = pte_mkdirty(entry);
                 }
-               set_pte(pte, pte_mkyoung(entry));
+               entry = pte_mkyoung(entry);
+               set_pte(pte, entry);
                 flush_tlb_page(vma, address);
+               update_mmu_cache(vma, address, entry);
         }
-       read_unlock(&mm->page_table_lock);
+       spin_unlock(&tsk->mm->page_table_lock);
         return 1;
  }
  
  /*
   * By the time we get here, we already hold the mm semaphore
   */
-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
+int handle_mm_fault(struct task_struct *tsk, struct vm_area_struct * vma,
         unsigned long address, int write_access)
  {
         pgd_t *pgd;
@@ -971,29 +977,27 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
         pmd = pmd_alloc(pgd, address);
         if (pmd) {
                 pte_t * pte = pte_alloc(pmd, address);
-               if (pte) {
-                       if (handle_pte_fault(mm, vma, address, write_access, pte)) {
-                               update_mmu_cache(vma, address, *pte);
-                               return 1;
-                       }
-               }
+               if (pte)
+                       return handle_pte_fault(tsk, vma, address, write_access, pte);
         }
-       return 0;
+       return -1;
  }
  
  /*
   * Simplistic page force-in..
   */
-void make_pages_present(unsigned long addr, unsigned long end)
+int make_pages_present(unsigned long addr, unsigned long end)
  {
         int write;
-       struct mm_struct *mm = current->mm;
+       struct task_struct *tsk = current;
         struct vm_area_struct * vma;
  
-       vma = find_vma(mm, addr);
+       vma = find_vma(tsk->mm, addr);
         write = (vma->vm_flags & VM_WRITE) != 0;
         while (addr < end) {
-               handle_mm_fault(mm, vma, addr, write);
+               if (handle_mm_fault(tsk, vma, addr, write) < 0)
+                       return -1;
                 addr += PAGE_SIZE;
         }
+       return 0;
  }
diff --git a/mm/mmap.c b/mm/mmap.c

index 8ed2979d9680faebffad7b6fed87342cc5959e9f..c9d07a2916794b70611c4b5d0c29b03891b651d8 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -460,13 +460,13 @@ struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
         return NULL;
  }
  
-struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr)
+struct vm_area_struct * find_extend_vma(struct task_struct * tsk, unsigned long addr)
  {
         struct vm_area_struct * vma;
         unsigned long start;
  
         addr &= PAGE_MASK;
-       vma = find_vma(mm,addr);
+       vma = find_vma(tsk->mm,addr);
         if (!vma)
                 return NULL;
         if (vma->vm_start <= addr)
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 3567098a15a38951a9345ad4c93df8562c58eafb..1ae052b94a7ea00d561b7c1b7e92d94cfa5dfbaa 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -47,7 +47,7 @@ static int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
                 goto out_failed;
  
         page = mem_map + MAP_NR(page_addr);
-       write_lock(&tsk->mm->page_table_lock);
+       spin_lock(&tsk->mm->page_table_lock);
         if (pte_val(pte) != pte_val(*page_table))
                 goto out_failed_unlock;
  
@@ -138,7 +138,7 @@ drop_pte:
         if (vma->vm_ops && vma->vm_ops->swapout) {
                 pid_t pid = tsk->pid;
                 pte_clear(page_table);
-               write_unlock(&tsk->mm->page_table_lock);
+               spin_unlock(&tsk->mm->page_table_lock);
                 flush_tlb_page(vma, address);
                 vma->vm_mm->rss--;
                 
@@ -158,9 +158,9 @@ drop_pte:
                 goto out_failed; /* No swap space left */
                 
         vma->vm_mm->rss--;
-       tsk->mm->nswap++;
+       tsk->nswap++;
         set_pte(page_table, __pte(entry));
-       write_unlock(&tsk->mm->page_table_lock);
+       spin_unlock(&tsk->mm->page_table_lock);
  
         flush_tlb_page(vma, address);
         swap_duplicate(entry);  /* One for the process, one for the swap cache */
@@ -175,7 +175,7 @@ out_free_success:
         __free_page(page);
         return 1;
  out_failed_unlock:
-       write_unlock(&tsk->mm->page_table_lock);
+       spin_unlock(&tsk->mm->page_table_lock);
  out_failed:
         return 0;
  }
@@ -352,7 +352,7 @@ static int swap_out(unsigned int priority, int gfp_mask)
                 read_lock(&tasklist_lock);
                 p = init_task.next_task;
                 for (; p != &init_task; p = p->next_task) {
-                       if (!p->mm->swappable)
+                       if (!p->swappable)
                                 continue;
                         if (p->mm->rss <= 0)
                                 continue;
diff --git a/net/core/scm.c b/net/core/scm.c

index 7e9f466cad966765cc29d7788ce91dc0148fd482..e2073166f35f244401292f1a263ea3569407cca0 100644 (file)
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -232,8 +232,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
                         break;
                 }
                 /* Bump the usage count and install the file. */
-               atomic_inc(&fp[i]->f_count);
-               current->files->fd[new_fd] = fp[i];
+               get_file(fp[i]);
+               fd_install(new_fd, fp[i]);
         }
  
         if (i > 0)
@@ -271,10 +271,9 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
  
         new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
         if (new_fpl) {
-               memcpy(new_fpl, fpl, sizeof(*fpl));
-
                 for (i=fpl->count-1; i>=0; i--)
-                       atomic_inc(&fpl->fp[i]->f_count);
+                       get_file(fpl->fp[i]);
+               memcpy(new_fpl, fpl, sizeof(*fpl));
         }
         return new_fpl;
  }
diff --git a/net/econet/econet.c b/net/econet/econet.c

index 8930109b37a02300026362774aa8bccc6cda000f..d790ae536bcc4acf95691a2b4033848c87f0f19a 100644 (file)
--- a/net/econet/econet.c
+++ b/net/econet/econet.c
@@ -759,7 +759,8 @@ static struct sock *ec_listening_socket(unsigned char port, unsigned char
                     (opt->station == station || opt->station == 0) &&
                     (opt->net == net || opt->net == 0))
                         return sk;
-               sk = sk->sklist_next;
+
+               sk = sk->next;
         }
  
         return NULL;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c

index ca0f27d0cb3109b8d1df41e0b2124ece6e01c02c..15b26fa1c688a100a5d981aad0421f8d17613a9a 100644 (file)
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -5,7 +5,7 @@
   *
   *             PF_INET protocol family socket handler.
   *
- * Version:    $Id: af_inet.c,v 1.91 1999/06/09 08:28:55 davem Exp $
+ * Version:    $Id: af_inet.c,v 1.93 1999/07/02 11:26:24 davem Exp $
   *
   * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
   *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -162,9 +162,6 @@ static __inline__ void kill_sk_queues(struct sock *sk)
  
  static __inline__ void kill_sk_now(struct sock *sk)
  {
-       /* No longer exists. */
-       del_from_prot_sklist(sk);
-
         /* Remove from protocol hash chains. */
         sk->prot->unhash(sk);
  
@@ -239,7 +236,7 @@ int inet_setsockopt(struct socket *sock, int level, int optname,
  {
         struct sock *sk=sock->sk;
         if (sk->prot->setsockopt==NULL)
-               return(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
         return sk->prot->setsockopt(sk,level,optname,optval,optlen);
  }
  
@@ -256,7 +253,7 @@ int inet_getsockopt(struct socket *sock, int level, int optname,
  {
         struct sock *sk=sock->sk;
         if (sk->prot->getsockopt==NULL)
-               return(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
         return sk->prot->getsockopt(sk,level,optname,optval,optlen);
  }
  
@@ -268,12 +265,10 @@ static int inet_autobind(struct sock *sk)
  {
         /* We may need to bind the socket. */
         if (sk->num == 0) {
-               sk->num = sk->prot->good_socknum();
-               if (sk->num == 0) 
-                       return(-EAGAIN);
+               if (sk->prot->get_port(sk, 0) != 0)
+                       return -EAGAIN;
                 sk->sport = htons(sk->num);
                 sk->prot->hash(sk);
-               add_to_prot_sklist(sk);
         }
         return 0;
  }
@@ -293,29 +288,38 @@ static void inet_listen_write_space(struct sock *sk)
  int inet_listen(struct socket *sock, int backlog)
  {
         struct sock *sk = sock->sk;
+       unsigned char old_state;
  
         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
-               return(-EINVAL);
-
-       if (inet_autobind(sk) != 0)
-               return -EAGAIN;
+               return -EINVAL;
  
-       /* We might as well re use these. */ 
         if ((unsigned) backlog == 0)    /* BSDism */
                 backlog = 1;
         if ((unsigned) backlog > SOMAXCONN)
                 backlog = SOMAXCONN;
         sk->max_ack_backlog = backlog;
-       if (sk->state != TCP_LISTEN) {
-               sk->ack_backlog = 0;
+
+       /* Really, if the socket is already in listen state
+        * we can only allow the backlog to be adjusted.
+        */
+       old_state = sk->state;
+       if (old_state != TCP_LISTEN) {
                 sk->state = TCP_LISTEN;
+               sk->ack_backlog = 0;
+               if (sk->num == 0) {
+                       if (sk->prot->get_port(sk, 0) != 0) {
+                               sk->state = old_state;
+                               return -EAGAIN;
+                       }
+                       sk->sport = htons(sk->num);
+               }
+
                 dst_release(xchg(&sk->dst_cache, NULL));
-               sk->prot->rehash(sk);
-               add_to_prot_sklist(sk);
+               sk->prot->hash(sk);
+               sk->socket->flags |= SO_ACCEPTCON;
                 sk->write_space = inet_listen_write_space;
         }
-       sk->socket->flags |= SO_ACCEPTCON;
-       return(0);
+       return 0;
  }
  
  /*
@@ -427,7 +431,6 @@ static int inet_create(struct socket *sock, int protocol)
  
                 /* Add to protocol hash chains. */
                 sk->prot->hash(sk);
-               add_to_prot_sklist(sk);
         }
  
         if (sk->prot->init) {
@@ -486,11 +489,9 @@ int inet_release(struct socket *sock, struct socket *peersock)
                  */
                 timeout = 0;
                 if (sk->linger && !(current->flags & PF_EXITING)) {
-                       timeout = MAX_SCHEDULE_TIMEOUT;
-
-                       /* XXX This makes no sense whatsoever... -DaveM */
-                       if (!sk->lingertime)
-                               timeout = HZ*sk->lingertime;
+                       timeout = HZ * sk->lingertime;
+                       if (!timeout)
+                               timeout = MAX_SCHEDULE_TIMEOUT;
                 }
                 sock->sk = NULL;
                 sk->socket = NULL;
@@ -543,21 +544,17 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
         if((snum >= PORT_MASQ_BEGIN) && (snum <= PORT_MASQ_END))
                 return -EADDRINUSE;
  #endif          
-       if (snum == 0) 
-               snum = sk->prot->good_socknum();
-       if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
-               return(-EACCES);
+       if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+               return -EACCES;
         
         /* Make sure we are allowed to bind here. */
-       if(sk->prot->verify_bind(sk, snum))
+       if (sk->prot->get_port(sk, snum) != 0)
                 return -EADDRINUSE;
  
-       sk->num = snum;
-       sk->sport = htons(snum);
+       sk->sport = htons(sk->num);
         sk->daddr = 0;
         sk->dport = 0;
-       sk->prot->rehash(sk);
-       add_to_prot_sklist(sk);
+       sk->prot->hash(sk);
         dst_release(sk->dst_cache);
         sk->dst_cache=NULL;
         return(0);
@@ -570,12 +567,12 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
         int err;
  
         if (inet_autobind(sk) != 0)
-               return(-EAGAIN);
+               return -EAGAIN;
         if (sk->prot->connect == NULL) 
-               return(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
         err = sk->prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
         if (err < 0) 
-               return(err);
+               return err;
         return(0);
  }
  
@@ -626,18 +623,20 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
                 if (flags & O_NONBLOCK)
                         return -EALREADY;
         } else {
+               if (sk->prot->connect == NULL) 
+                       return -EOPNOTSUPP;
+
                 /* We may need to bind the socket. */
                 if (inet_autobind(sk) != 0)
-                       return(-EAGAIN);
-               if (sk->prot->connect == NULL) 
-                       return(-EOPNOTSUPP);
+                       return -EAGAIN;
+
                 err = sk->prot->connect(sk, uaddr, addr_len);
                 /* Note: there is a theoretical race here when an wake up
                    occurred before inet_wait_for_connect is entered. In 2.3
                    the wait queue setup should be moved before the low level
                    connect call. -AK*/
                 if (err < 0)
-                       return(err);
+                       return err;
                 sock->state = SS_CONNECTING;
         }
         
@@ -645,7 +644,7 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
                 goto sock_error;
  
         if (sk->state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) 
-               return (-EINPROGRESS);
+               return -EINPROGRESS;
  
         if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
                 inet_wait_for_connect(sk);
@@ -656,7 +655,7 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
         sock->state = SS_CONNECTED;
         if ((sk->state != TCP_ESTABLISHED) && sk->err)
                 goto sock_error; 
-       return(0);
+       return 0;
  
  sock_error:    
         /* This is ugly but needed to fix a race in the ICMP error handler */
@@ -750,7 +749,7 @@ static int inet_getname(struct socket *sock, struct sockaddr *uaddr,
         sin->sin_family = AF_INET;
         if (peer) {
                 if (!tcp_connected(sk->state)) 
-                       return(-ENOTCONN);
+                       return -ENOTCONN;
                 sin->sin_port = sk->dport;
                 sin->sin_addr.s_addr = sk->daddr;
         } else {
@@ -774,12 +773,12 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, int size,
         int err;
         
         if (sock->flags & SO_ACCEPTCON)
-               return(-EINVAL);
+               return -EINVAL;
         if (sk->prot->recvmsg == NULL) 
-               return(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
         /* We may need to bind the socket. */
         if (inet_autobind(sk) != 0)
-               return(-EAGAIN);
+               return -EAGAIN;
         err = sk->prot->recvmsg(sk, msg, size, flags&MSG_DONTWAIT,
                                 flags&~MSG_DONTWAIT, &addr_len);
         if (err >= 0)
@@ -796,15 +795,15 @@ int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size,
         if (sk->shutdown & SEND_SHUTDOWN) {
                 if (!(msg->msg_flags&MSG_NOSIGNAL))
                         send_sig(SIGPIPE, current, 1);
-               return(-EPIPE);
+               return -EPIPE;
         }
         if (sk->prot->sendmsg == NULL) 
-               return(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
         if(sk->err)
                 return sock_error(sk);
  
         /* We may need to bind the socket. */
-       if(inet_autobind(sk) != 0)
+       if (inet_autobind(sk) != 0)
                 return -EAGAIN;
  
         return sk->prot->sendmsg(sk, msg, size);
@@ -822,11 +821,13 @@ int inet_shutdown(struct socket *sock, int how)
                        1->2 bit 2 snds.
                        2->3 */
         if ((how & ~SHUTDOWN_MASK) || how==0)   /* MAXINT->0 */
-               return(-EINVAL);
+               return -EINVAL;
+       if (!sk)
+               return -ENOTCONN;
         if (sock->state == SS_CONNECTING && sk->state == TCP_ESTABLISHED)
                 sock->state = SS_CONNECTED;
-       if (!sk || !tcp_connected(sk->state)) 
-               return(-ENOTCONN);
+       if (!tcp_connected(sk->state)) 
+               return -ENOTCONN;
         sk->shutdown |= how;
         if (sk->prot->shutdown)
                 sk->prot->shutdown(sk, how);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c

index 52c5ee5a40b52bbe1364d81dd7b56e6a9575c66a..7057c343aef19d198fdf69be3115eef03435c8d0 100644 (file)
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -7,7 +7,7 @@
   *             PROC file system.  It is mainly used for debugging and
   *             statistics.
   *
- * Version:    $Id: proc.c,v 1.35 1999/05/27 00:37:38 davem Exp $
+ * Version:    $Id: proc.c,v 1.36 1999/07/02 11:26:34 davem Exp $
   *
   * Authors:    Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   *             Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de>
@@ -50,189 +50,6 @@
  #include <net/sock.h>
  #include <net/raw.h>
  
-/* Format a single open_request into tmpbuf. */
-static inline void get__openreq(struct sock *sk, struct open_request *req, 
-                               char *tmpbuf, 
-                               int i)
-{
-       sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
-               " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u",
-               i,
-               (long unsigned int)req->af.v4_req.loc_addr,
-               ntohs(sk->sport),
-               (long unsigned int)req->af.v4_req.rmt_addr,
-               ntohs(req->rmt_port),
-               TCP_SYN_RECV,
-               0,0, /* could print option size, but that is af dependent. */
-               1,   /* timers active (only the expire timer) */  
-               (unsigned long)(req->expires - jiffies), 
-               req->retrans,
-               sk->socket ? sk->socket->inode->i_uid : 0,
-               0,  /* non standard timer */  
-               0 /* open_requests have no inode */
-               ); 
-}
-
-/* Format a single socket into tmpbuf. */
-static inline void get__sock(struct sock *sp, char *tmpbuf, int i, int format)
-{
-       unsigned long  dest, src;
-       unsigned short destp, srcp;
-       int timer_active, timer_active1, timer_active2;
-       int tw_bucket = 0;
-       unsigned long timer_expires;
-       struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
-
-       dest  = sp->daddr;
-       src   = sp->rcv_saddr;
-       destp = sp->dport;
-       srcp  = sp->sport;
-       
-       /* FIXME: The fact that retransmit_timer occurs as a field
-        * in two different parts of the socket structure is,
-        * to say the least, confusing. This code now uses the
-        * right retransmit_timer variable, but I'm not sure
-        * the rest of the timer stuff is still correct.
-        * In particular I'm not sure what the timeout value
-        * is suppose to reflect (as opposed to tm->when). -- erics
-        */
-       
-       destp = ntohs(destp);
-       srcp  = ntohs(srcp);
-       if((format == 0) && (sp->state == TCP_TIME_WAIT)) {
-               extern int tcp_tw_death_row_slot;
-               struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sp;
-               int slot_dist;
-
-               tw_bucket       = 1;
-               timer_active1   = timer_active2 = 0;
-               timer_active    = 3;
-               slot_dist       = tw->death_slot;
-               if(slot_dist > tcp_tw_death_row_slot)
-                       slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot;
-               else
-                       slot_dist = tcp_tw_death_row_slot - slot_dist;
-               timer_expires   = jiffies + (slot_dist * TCP_TWKILL_PERIOD);
-       } else {
-               timer_active1 = tp->retransmit_timer.prev != NULL;
-               timer_active2 = sp->timer.prev != NULL;
-               timer_active    = 0;
-               timer_expires   = (unsigned) -1;
-       }
-       if (timer_active1 && tp->retransmit_timer.expires < timer_expires) {
-               timer_active    = 1;
-               timer_expires   = tp->retransmit_timer.expires;
-       }
-       if (timer_active2 && sp->timer.expires < timer_expires) {
-               timer_active    = 2;
-               timer_expires   = sp->timer.expires;
-       }
-       if(timer_active == 0)
-               timer_expires = jiffies;
-       sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
-               " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
-               i, src, srcp, dest, destp, sp->state, 
-               (tw_bucket ?
-                0 :
-                (format == 0) ?
-                tp->write_seq-tp->snd_una : atomic_read(&sp->wmem_alloc)),
-               (tw_bucket ?
-                0 :
-                (format == 0) ?
-                tp->rcv_nxt-tp->copied_seq: atomic_read(&sp->rmem_alloc)),
-               timer_active, timer_expires-jiffies,
-               (tw_bucket ? 0 : tp->retransmits),
-               (!tw_bucket && sp->socket) ? sp->socket->inode->i_uid : 0,
-               (!tw_bucket && timer_active) ? sp->timeout : 0,
-               (!tw_bucket && sp->socket) ? sp->socket->inode->i_ino : 0);
-}
-
-/*
- * Get__netinfo returns the length of that string.
- *
- * KNOWN BUGS
- *  As in get_unix_netinfo, the buffer might be too small. If this
- *  happens, get__netinfo returns only part of the available infos.
- *
- *  Assumes that buffer length is a multiply of 128 - if not it will
- *  write past the end.   
- */
-static int
-get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t offset, int length)
-{
-       struct sock *sp, *next;
-       int len=0, i = 0;
-       off_t pos=0;
-       off_t begin;
-       char tmpbuf[129];
-  
-       if (offset < 128) 
-               len += sprintf(buffer, "%-127s\n",
-                              "  sl  local_address rem_address   st tx_queue "
-                              "rx_queue tr tm->when retrnsmt   uid  timeout inode");
-       pos = 128;
-       SOCKHASH_LOCK_READ();
-       sp = pro->sklist_next;
-       while(sp != (struct sock *)pro) {
-               if (format == 0 && sp->state == TCP_LISTEN) {
-                       struct open_request *req;
-
-                       for (req = sp->tp_pinfo.af_tcp.syn_wait_queue; req;
-                            i++, req = req->dl_next) {
-                               if (req->sk)
-                                       continue;
-                               pos += 128;
-                               if (pos < offset) 
-                                       continue;
-                               get__openreq(sp, req, tmpbuf, i); 
-                               len += sprintf(buffer+len, "%-127s\n", tmpbuf);
-                               if(len >= length) 
-                                       goto out;
-                       }
-               }
-               
-               pos += 128;
-               if (pos < offset)
-                       goto next;
-               
-               get__sock(sp, tmpbuf, i, format);
-               
-               len += sprintf(buffer+len, "%-127s\n", tmpbuf);
-               if(len >= length)
-                       break;
-       next:
-               next = sp->sklist_next;
-               sp = next;
-               i++;
-       }
-out: 
-       SOCKHASH_UNLOCK_READ();
-       
-       begin = len - (pos - offset);
-       *start = buffer + begin;
-       len -= begin;
-       if(len>length)
-               len = length;
-       if (len<0)
-               len = 0; 
-       return len;
-} 
-
-int tcp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
-       return get__netinfo(&tcp_prot, buffer,0, start, offset, length);
-}
-
-int udp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
-       return get__netinfo(&udp_prot, buffer,1, start, offset, length);
-}
-
-int raw_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
-       return get__netinfo(&raw_prot, buffer,1, start, offset, length);
-}
-
  /*
   *     Report socket allocation statistics [mea@utu.fi]
   */
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c

index dd2e7555e64a39bfbc179bbb4a03e7d7ee271bef..584fe81fc7abf13e4ce9c95740b32809eb65bea1 100644 (file)
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -5,7 +5,7 @@
   *
   *             RAW - implementation of IP "raw" sockets.
   *
- * Version:    $Id: raw.c,v 1.41 1999/05/30 01:16:19 davem Exp $
+ * Version:    $Id: raw.c,v 1.42 1999/07/02 11:26:26 davem Exp $
   *
   * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
   *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -70,60 +70,32 @@ struct sock *raw_v4_htable[RAWV4_HTABLE_SIZE];
  
  static void raw_v4_hash(struct sock *sk)
  {
-       struct sock **skp;
-       int num = sk->num;
+       struct sock **skp = &raw_v4_htable[sk->num & (RAWV4_HTABLE_SIZE - 1)];
  
-       num &= (RAWV4_HTABLE_SIZE - 1);
-       skp = &raw_v4_htable[num];
         SOCKHASH_LOCK_WRITE();
-       sk->next = *skp;
+       if ((sk->next = *skp) != NULL)
+               (*skp)->pprev = &sk->next;
         *skp = sk;
-       sk->hashent = num;
+       sk->pprev = skp;
+       sk->prot->inuse++;
+       if(sk->prot->highestinuse < sk->prot->inuse)
+               sk->prot->highestinuse = sk->prot->inuse;
         SOCKHASH_UNLOCK_WRITE();
  }
  
  static void raw_v4_unhash(struct sock *sk)
  {
-       struct sock **skp;
-       int num = sk->num;
-
-       num &= (RAWV4_HTABLE_SIZE - 1);
-       skp = &raw_v4_htable[num];
-
         SOCKHASH_LOCK_WRITE();
-       while(*skp != NULL) {
-               if(*skp == sk) {
-                       *skp = sk->next;
-                       break;
-               }
-               skp = &((*skp)->next);
+       if (sk->pprev) {
+               if (sk->next)
+                       sk->next->pprev = sk->pprev;
+               *sk->pprev = sk->next;
+               sk->pprev = NULL;
+               sk->prot->inuse--;
         }
         SOCKHASH_UNLOCK_WRITE();
  }
  
-static void raw_v4_rehash(struct sock *sk)
-{
-       struct sock **skp;
-       int num = sk->num;
-       int oldnum = sk->hashent;
-
-       num &= (RAWV4_HTABLE_SIZE - 1);
-       skp = &raw_v4_htable[oldnum];
-
-       SOCKHASH_LOCK_WRITE();
-       while(*skp != NULL) {
-               if(*skp == sk) {
-                       *skp = sk->next;
-                       break;
-               }
-               skp = &((*skp)->next);
-       }
-       sk->next = raw_v4_htable[num];
-       raw_v4_htable[num] = sk;
-       sk->hashent = num;
-       SOCKHASH_UNLOCK_WRITE();
-}
-
  static __inline__ struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
                                                unsigned long raddr, unsigned long laddr,
                                                int dif)
@@ -640,9 +612,69 @@ static int raw_getsockopt(struct sock *sk, int level, int optname,
         return -ENOPROTOOPT;
  }
  
+static void get_raw_sock(struct sock *sp, char *tmpbuf, int i)
+{
+       unsigned int dest, src;
+       __u16 destp, srcp;
+       int timer_active;
+       unsigned long timer_expires;
+
+       dest  = sp->daddr;
+       src   = sp->rcv_saddr;
+       destp = ntohs(sp->dport);
+       srcp  = ntohs(sp->sport);
+       timer_active = (sp->timer.prev != NULL) ? 2 : 0;
+       timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies);
+       sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+               " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+               i, src, srcp, dest, destp, sp->state, 
+               atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
+               timer_active, timer_expires-jiffies, 0,
+               sp->socket->inode->i_uid, timer_active ? sp->timeout : 0,
+               sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+int raw_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+       int len = 0, num = 0, i;
+       off_t pos = 0;
+       off_t begin;
+       char tmpbuf[129];
+
+       if (offset < 128) 
+               len += sprintf(buffer, "%-127s\n",
+                              "  sl  local_address rem_address   st tx_queue "
+                              "rx_queue tr tm->when retrnsmt   uid  timeout inode");
+       pos = 128;
+       SOCKHASH_LOCK_READ();
+       for (i = 0; i < RAWV4_HTABLE_SIZE; i++) {
+               struct sock *sk;
+
+               for (sk = raw_v4_htable[i]; sk; sk = sk->next, num++) {
+                       if (sk->family != PF_INET)
+                               continue;
+                       pos += 128;
+                       if (pos < offset)
+                               continue;
+                       get_raw_sock(sk, tmpbuf, i);
+                       len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+                       if(len >= length)
+                               goto out;
+               }
+       }
+out:
+       SOCKHASH_UNLOCK_READ();
+       begin = len - (pos - offset);
+       *start = buffer + begin;
+       len -= begin;
+       if(len > length)
+               len = length;
+       if (len < 0)
+               len = 0; 
+       return len;
+}
+
  struct proto raw_prot = {
-       (struct sock *)&raw_prot,       /* sklist_next */
-       (struct sock *)&raw_prot,       /* sklist_prev */
         raw_close,                      /* close */
         udp_connect,                    /* connect */
         NULL,                           /* accept */
@@ -666,9 +698,7 @@ struct proto raw_prot = {
         raw_rcv_skb,                    /* backlog_rcv */
         raw_v4_hash,                    /* hash */
         raw_v4_unhash,                  /* unhash */
-       raw_v4_rehash,                  /* rehash */
-       NULL,                           /* good_socknum */
-       NULL,                           /* verify_bind */
+       NULL,                           /* get_port */
         128,                            /* max_header */
         0,                              /* retransmits */
         "RAW",                          /* name */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index af4165fce4c580e24a8829cc741e7d4f30f4f1dc..3080bc201aa24dceb7631ae3c66de8bff34b5df1 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5,7 +5,7 @@
   *
   *             Implementation of the Transmission Control Protocol(TCP).
   *
- * Version:    $Id: tcp_input.c,v 1.169 1999/06/09 08:29:13 davem Exp $
+ * Version:    $Id: tcp_input.c,v 1.170 1999/07/02 11:26:28 davem Exp $
   *
   * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
   *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -917,25 +917,26 @@ extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
  /* Must be called only from BH context. */
  void tcp_timewait_kill(struct tcp_tw_bucket *tw)
  {
+       struct tcp_bind_bucket *tb = tw->tb;
+
         SOCKHASH_LOCK_WRITE_BH();
  
-       /* Unlink from various places. */
+       /* Disassociate with bind bucket. */
         if(tw->bind_next)
                 tw->bind_next->bind_pprev = tw->bind_pprev;
         *(tw->bind_pprev) = tw->bind_next;
-       if(tw->tb->owners == NULL)
-               tcp_inc_slow_timer(TCP_SLT_BUCKETGC);
+       if (tb->owners == NULL) {
+               if (tb->next)
+                       tb->next->pprev = tb->pprev;
+               *(tb->pprev) = tb->next;
+               kmem_cache_free(tcp_bucket_cachep, tb);
+       }
  
+       /* Unlink from established hashes. */
         if(tw->next)
                 tw->next->pprev = tw->pprev;
         *tw->pprev = tw->next;
  
-       /* We decremented the prot->inuse count when we entered TIME_WAIT
-        * and the sock from which this came was destroyed.
-        */
-       tw->sklist_next->sklist_prev = tw->sklist_prev;
-       tw->sklist_prev->sklist_next = tw->sklist_next;
-
         SOCKHASH_UNLOCK_WRITE_BH();
  
         /* Ok, now free it up. */
@@ -1040,11 +1041,9 @@ static __inline__ void tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *t
                 sk->bind_next->bind_pprev = &tw->bind_next;
         tw->bind_pprev = sk->bind_pprev;
         *sk->bind_pprev = (struct sock *)tw;
+       sk->prev = NULL;
  
-       /* Step 3: Same for the protocol sklist. */
-       (tw->sklist_next = sk->sklist_next)->sklist_prev = (struct sock *)tw;
-       (tw->sklist_prev = sk->sklist_prev)->sklist_next = (struct sock *)tw;
-       sk->sklist_next = NULL;
+       /* Step 3: Un-charge protocol socket in-use count. */
         sk->prot->inuse--;
  
         /* Step 4: Hash TW into TIMEWAIT half of established hash table. */
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

index 564e859f259c0c5411b5088388a343f51ab90ef4..957ea9d38af530eeadec5c86681f3fd76372f2c5 100644 (file)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
   *
   *             Implementation of the Transmission Control Protocol(TCP).
   *
- * Version:    $Id: tcp_ipv4.c,v 1.180 1999/06/09 08:29:19 davem Exp $
+ * Version:    $Id: tcp_ipv4.c,v 1.181 1999/07/02 11:26:31 davem Exp $
   *
   *             IPv4 specific functions
   *
@@ -132,28 +132,9 @@ static __inline__ int tcp_sk_hashfn(struct sock *sk)
         return tcp_hashfn(laddr, lport, faddr, fport);
  }
  
-/* Invariant, sk->num is non-zero. */
-void tcp_bucket_unlock(struct sock *sk)
-{
-       struct tcp_bind_bucket *tb;
-       unsigned short snum = sk->num;
-
-       SOCKHASH_LOCK_WRITE();
-       for(tb = tcp_bhash[tcp_bhashfn(snum)]; tb; tb = tb->next) {
-               if(tb->port == snum) {
-                       if(tb->owners == NULL &&
-                          (tb->flags & TCPB_FLAG_LOCKED)) {
-                               tb->flags &= ~(TCPB_FLAG_LOCKED |
-                                              TCPB_FLAG_FASTREUSE);
-                               tcp_inc_slow_timer(TCP_SLT_BUCKETGC);
-                       }
-                       break;
-               }
-       }
-       SOCKHASH_UNLOCK_WRITE();
-}
-
-/* The sockhash lock must be held as a writer here. */
+/* Allocate and initialize a new TCP local port bind bucket.
+ * The sockhash lock must be held as a writer here.
+ */
  struct tcp_bind_bucket *tcp_bucket_create(unsigned short snum)
  {
         struct tcp_bind_bucket *tb;
@@ -163,7 +144,7 @@ struct tcp_bind_bucket *tcp_bucket_create(unsigned short snum)
                 struct tcp_bind_bucket **head =
                         &tcp_bhash[tcp_bhashfn(snum)];
                 tb->port = snum;
-               tb->flags = TCPB_FLAG_LOCKED;
+               tb->fastreuse = 0;
                 tb->owners = NULL;
                 if((tb->next = *head) != NULL)
                         tb->next->pprev = &tb->next;
@@ -186,133 +167,176 @@ static __inline__ int tcp_bucket_check(unsigned short snum)
         tb = tcp_bhash[tcp_bhashfn(snum)];
         for( ; (tb && (tb->port != snum)); tb = tb->next)
                 ;
-       if(tb == NULL && tcp_bucket_create(snum) == NULL)
-               ret = 1;
+       ret = 0
+       if (tb == NULL) {
+               if ((tb = tcp_bucket_create(snum)) == NULL)
+                       ret = 1;
+       }
         SOCKHASH_UNLOCK_WRITE();
  
         return ret;
  }
  #endif
  
-static int tcp_v4_verify_bind(struct sock *sk, unsigned short snum)
+static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
+{
+       struct tcp_bind_bucket *tb = (struct tcp_bind_bucket *)sk->prev;
+
+       if ((child->bind_next = tb->owners) != NULL)
+               tb->owners->bind_pprev = &child->bind_next;
+       tb->owners = child;
+       child->bind_pprev = &tb->owners;
+       child->prev = (struct sock *) tb;
+}
+
+__inline__ void tcp_inherit_port(struct sock *sk, struct sock *child)
+{
+       SOCKHASH_LOCK_WRITE();
+       __tcp_inherit_port(sk, child);
+       SOCKHASH_UNLOCK_WRITE();
+}
+
+/* Obtain a reference to a local port for the given sock,
+ * if snum is zero it means select any available local port.
+ */
+static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
  {
         struct tcp_bind_bucket *tb;
-       int result = 0;
  
         SOCKHASH_LOCK_WRITE();
-       for(tb = tcp_bhash[tcp_bhashfn(snum)];
-           (tb && (tb->port != snum));
-           tb = tb->next)
-               ;
-       if(tb && tb->owners) {
-               /* Fast path for reuse ports, see include/net/tcp.h for a very
-                * detailed description of why this works, and why it is worth
-                * the effort at all. -DaveM
-                */
-               if((tb->flags & TCPB_FLAG_FASTREUSE)    &&
-                  (sk->reuse != 0)) {
-                       goto go_like_smoke;
+       if (snum == 0) {
+               int rover = tcp_port_rover;
+               int low = sysctl_local_port_range[0];
+               int high = sysctl_local_port_range[1];
+               int remaining = (high - low) + 1;
+
+               do {    rover++;
+                       if ((rover < low) || (rover > high))
+                               rover = low;
+                       tb = tcp_bhash[tcp_bhashfn(rover)];
+                       for ( ; tb; tb = tb->next)
+                               if (tb->port == rover)
+                                       goto next;
+                       break;
+               next:
+               } while (--remaining > 0);
+               tcp_port_rover = rover;
+
+               /* Exhausted local port range during search? */
+               if (remaining <= 0)
+                       goto fail;
+
+               /* OK, here is the one we will use. */
+               snum = rover;
+               tb = NULL;
+       } else {
+               for (tb = tcp_bhash[tcp_bhashfn(snum)];
+                    tb != NULL;
+                    tb = tb->next)
+                       if (tb->port == snum)
+                               break;
+       }
+       if (tb != NULL && tb->owners != NULL) {
+               if (tb->fastreuse != 0 && sk->reuse != 0) {
+                       goto success;
                 } else {
-                       struct sock *sk2;
+                       struct sock *sk2 = tb->owners;
                         int sk_reuse = sk->reuse;
  
-                       /* We must walk the whole port owner list in this case. -DaveM */
-                       for(sk2 = tb->owners; sk2; sk2 = sk2->bind_next) {
+                       for( ; sk2 != NULL; sk2 = sk2->bind_next) {
                                 if (sk->bound_dev_if == sk2->bound_dev_if) {
-                                       if(!sk_reuse || !sk2->reuse || sk2->state == TCP_LISTEN) {
-                                               if(!sk2->rcv_saddr              ||
-                                                  !sk->rcv_saddr               ||
-                                                  (sk2->rcv_saddr == sk->rcv_saddr))
+                                       if (!sk_reuse   ||
+                                           !sk2->reuse ||
+                                           sk2->state == TCP_LISTEN) {
+                                               if (!sk2->rcv_saddr     ||
+                                                   !sk->rcv_saddr      ||
+                                                   (sk2->rcv_saddr == sk->rcv_saddr))
                                                         break;
                                         }
                                 }
                         }
-                       if(sk2 != NULL)
-                               result = 1;
+                       /* If we found a conflict, fail. */
+                       if (sk2 != NULL)
+                               goto fail;
                 }
         }
-       if(result == 0) {
-               if(tb == NULL) {
-                       if((tb = tcp_bucket_create(snum)) == NULL)
-                               result = 1;
-                       else if (sk->reuse && sk->state != TCP_LISTEN)
-                               tb->flags |= TCPB_FLAG_FASTREUSE;
-               } else {
-                       /* It could be pending garbage collection, this
-                        * kills the race and prevents it from disappearing
-                        * out from under us by the time we use it.  -DaveM
-                        */
-                       if(tb->owners == NULL) {
-                               if (!(tb->flags & TCPB_FLAG_LOCKED)) {
-                                       tb->flags = (TCPB_FLAG_LOCKED |
-                                                    ((sk->reuse &&
-                                                      sk->state != TCP_LISTEN) ?
-                                                     TCPB_FLAG_FASTREUSE : 0));
-                                       tcp_dec_slow_timer(TCP_SLT_BUCKETGC);
-                               } else if (!(tb->flags & TCPB_FLAG_GOODSOCKNUM)) {
-                                       /* Someone is in between the bind
-                                        * and the actual connect or listen.
-                                        * See if it was a legitimate reuse
-                                        * and we are as well, else punt.
-                                        */
-                                       if (sk->reuse == 0 ||
-                                           !(tb->flags & TCPB_FLAG_FASTREUSE))
-                                               result = 1;
-                               } else
-                                       tb->flags &= ~TCPB_FLAG_GOODSOCKNUM;
-                       }
-               }
-       }
-go_like_smoke:
+       if (tb == NULL &&
+           (tb = tcp_bucket_create(snum)) == NULL)
+                       goto fail;
+       if (tb->owners == NULL) {
+               if (sk->reuse && sk->state != TCP_LISTEN)
+                       tb->fastreuse = 1;
+               else
+                       tb->fastreuse = 0;
+       } else if (tb->fastreuse &&
+                  ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))
+               tb->fastreuse = 0;
+success:
+       sk->num = snum;
+       if ((sk->bind_next = tb->owners) != NULL)
+               tb->owners->bind_pprev = &sk->bind_next;
+       tb->owners = sk;
+       sk->bind_pprev = &tb->owners;
+       sk->prev = (struct sock *) tb;
+
         SOCKHASH_UNLOCK_WRITE();
-       return result;
+       return 0;
+
+fail:
+       SOCKHASH_UNLOCK_WRITE();
+       return 1;
  }
  
-unsigned short tcp_good_socknum(void)
+/* Get rid of any references to a local port held by the
+ * given sock.
+ */
+__inline__ void __tcp_put_port(struct sock *sk)
  {
         struct tcp_bind_bucket *tb;
-       int low = sysctl_local_port_range[0];
-       int high = sysctl_local_port_range[1];
-       int remaining = (high - low) + 1;
-       int rover;
  
+       tb = (struct tcp_bind_bucket *) sk->prev;
+       if (sk->bind_next)
+               sk->bind_next->bind_pprev = sk->bind_pprev;
+       *(sk->bind_pprev) = sk->bind_next;
+       sk->prev = NULL;
+       if (tb->owners == NULL) {
+               if (tb->next)
+                       tb->next->pprev = tb->pprev;
+               *(tb->pprev) = tb->next;
+               kmem_cache_free(tcp_bucket_cachep, tb);
+       }
+}
+
+void tcp_put_port(struct sock *sk)
+{
         SOCKHASH_LOCK_WRITE();
-       rover = tcp_port_rover;
-       do {
-               rover += 1;
-               if((rover < low) || (rover > high))
-                       rover = low;
-               tb = tcp_bhash[tcp_bhashfn(rover)];
-               for( ; tb; tb = tb->next) {
-                       if(tb->port == rover)
-                               goto next;
-               }
-               break;
-       next:
-       } while(--remaining > 0);
-       tcp_port_rover = rover;
-       tb = NULL;
-       if((remaining <= 0) || ((tb = tcp_bucket_create(rover)) == NULL))
-               rover = 0;
-       if (tb != NULL)
-               tb->flags |= TCPB_FLAG_GOODSOCKNUM;
+       __tcp_put_port(sk);
         SOCKHASH_UNLOCK_WRITE();
+}
+
+static __inline__ void __tcp_v4_hash(struct sock *sk)
+{
+       struct sock **skp;
  
-       return rover;
+       if(sk->state == TCP_LISTEN)
+               skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
+       else
+               skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))];
+
+       if((sk->next = *skp) != NULL)
+               (*skp)->pprev = &sk->next;
+       *skp = sk;
+       sk->pprev = skp;
+       sk->prot->inuse++;
+       if(sk->prot->highestinuse < sk->prot->inuse)
+               sk->prot->highestinuse = sk->prot->inuse;
  }
  
  static void tcp_v4_hash(struct sock *sk)
  {
         if (sk->state != TCP_CLOSE) {
-               struct sock **skp;
-
                 SOCKHASH_LOCK_WRITE();
-               skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))];
-               if((sk->next = *skp) != NULL)
-                       (*skp)->pprev = &sk->next;
-               *skp = sk;
-               sk->pprev = skp;
-               tcp_sk_bindify(sk);
+               __tcp_v4_hash(sk);
                 SOCKHASH_UNLOCK_WRITE();
         }
  }
@@ -325,39 +349,9 @@ static void tcp_v4_unhash(struct sock *sk)
                         sk->next->pprev = sk->pprev;
                 *sk->pprev = sk->next;
                 sk->pprev = NULL;
+               sk->prot->inuse--;
                 tcp_reg_zap(sk);
-               tcp_sk_unbindify(sk);
-       }
-       SOCKHASH_UNLOCK_WRITE();
-}
-
-static void tcp_v4_rehash(struct sock *sk)
-{
-       unsigned char state;
-
-       SOCKHASH_LOCK_WRITE();
-       state = sk->state;
-       if(sk->pprev != NULL) {
-               if(sk->next)
-                       sk->next->pprev = sk->pprev;
-               *sk->pprev = sk->next;
-               sk->pprev = NULL;
-               tcp_reg_zap(sk);
-       }
-       if(state != TCP_CLOSE) {
-               struct sock **skp;
-
-               if(state == TCP_LISTEN)
-                       skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
-               else
-                       skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))];
-
-               if((sk->next = *skp) != NULL)
-                       (*skp)->pprev = &sk->next;
-               *skp = sk;
-               sk->pprev = skp;
-               if(state == TCP_LISTEN)
-                       tcp_sk_bindify(sk);
+               __tcp_put_port(sk);
         }
         SOCKHASH_UNLOCK_WRITE();
  }
@@ -1344,7 +1338,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
  #endif
  
                 memcpy(newsk, sk, sizeof(*newsk));
-               newsk->sklist_next = NULL;
                 newsk->state = TCP_SYN_RECV;
  
                 /* Clone the TCP header template */
@@ -1536,8 +1529,11 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
         if (newsk->sndbuf < (3 * newtp->pmtu_cookie))
                 newsk->sndbuf = min ((3 * newtp->pmtu_cookie), sysctl_wmem_max);
   
-       tcp_v4_hash(newsk);
-       add_to_prot_sklist(newsk);
+       SOCKHASH_LOCK_WRITE();
+       __tcp_v4_hash(newsk);
+       __tcp_inherit_port(sk, newsk);
+       SOCKHASH_UNLOCK_WRITE();
+
         sk->data_ready(sk, 0); /* Deliver SIGIO */ 
  
         return newsk;
@@ -1780,6 +1776,25 @@ do_time_wait:
         goto discard_it;
  }
  
+static void __tcp_v4_rehash(struct sock *sk)
+{
+       struct sock **skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))];
+
+       SOCKHASH_LOCK_WRITE();
+       if(sk->pprev) {
+               if(sk->next)
+                       sk->next->pprev = sk->pprev;
+               *sk->pprev = sk->next;
+               sk->pprev = NULL;
+               tcp_reg_zap(sk);
+       }
+       if((sk->next = *skp) != NULL)
+               (*skp)->pprev = &sk->next;
+       *skp = sk;
+       sk->pprev = skp;
+       SOCKHASH_UNLOCK_WRITE();
+}
+
  int tcp_v4_rebuild_header(struct sock *sk)
  {
         struct rtable *rt = (struct rtable *)sk->dst_cache;
@@ -1853,7 +1868,12 @@ do_rewrite:
  
                 sk->saddr = new_saddr;
                 sk->rcv_saddr = new_saddr;
-               tcp_v4_rehash(sk);
+
+               /* XXX The only one ugly spot where we need to
+                * XXX really change the sockets identity after
+                * XXX it has entered the hashes. -DaveM
+                */
+               __tcp_v4_rehash(sk);
         } 
          
         return 0;
@@ -1948,20 +1968,192 @@ static int tcp_v4_destroy_sock(struct sock *sk)
         while((skb = __skb_dequeue(&tp->out_of_order_queue)) != NULL)
                 kfree_skb(skb);
  
-       /* Clean up a locked TCP bind bucket, this only happens if a
+       /* Clean up a referenced TCP bind bucket, this only happens if a
          * port is allocated for a socket, but it never fully connects.
-        * In which case we will find num to be non-zero and daddr to
-        * be zero.
          */
-       if(sk->daddr == 0 && sk->num != 0)
-               tcp_bucket_unlock(sk);
+       if(sk->prev != NULL)
+               tcp_put_port(sk);
  
         return 0;
  }
  
+/* Proc filesystem TCP sock list dumping. */
+static void get_openreq(struct sock *sk, struct open_request *req, char *tmpbuf, int i)
+{
+       sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
+               " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u",
+               i,
+               (long unsigned int)req->af.v4_req.loc_addr,
+               ntohs(sk->sport),
+               (long unsigned int)req->af.v4_req.rmt_addr,
+               ntohs(req->rmt_port),
+               TCP_SYN_RECV,
+               0,0, /* could print option size, but that is af dependent. */
+               1,   /* timers active (only the expire timer) */  
+               (unsigned long)(req->expires - jiffies), 
+               req->retrans,
+               sk->socket ? sk->socket->inode->i_uid : 0,
+               0,  /* non standard timer */  
+               0 /* open_requests have no inode */
+               ); 
+}
+
+static void get_tcp_sock(struct sock *sp, char *tmpbuf, int i)
+{
+       unsigned int dest, src;
+       __u16 destp, srcp;
+       int timer_active, timer_active1, timer_active2;
+       unsigned long timer_expires;
+       struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
+
+       dest  = sp->daddr;
+       src   = sp->rcv_saddr;
+       destp = ntohs(sp->dport);
+       srcp  = ntohs(sp->sport);
+       timer_active1 = tp->retransmit_timer.prev != NULL;
+       timer_active2 = sp->timer.prev != NULL;
+       timer_active    = 0;
+       timer_expires   = (unsigned) -1;
+       if (timer_active1 && tp->retransmit_timer.expires < timer_expires) {
+               timer_active    = 1;
+               timer_expires   = tp->retransmit_timer.expires;
+       }
+       if (timer_active2 && sp->timer.expires < timer_expires) {
+               timer_active    = 2;
+               timer_expires   = sp->timer.expires;
+       }
+       if(timer_active == 0)
+               timer_expires = jiffies;
+
+       sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+               " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+               i, src, srcp, dest, destp, sp->state, 
+               tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
+               timer_active, timer_expires-jiffies,
+               tp->retransmits,
+               sp->socket ? sp->socket->inode->i_uid : 0,
+               timer_active ? sp->timeout : 0,
+               sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+static void get_timewait_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
+{
+       extern int tcp_tw_death_row_slot;
+       unsigned int dest, src;
+       __u16 destp, srcp;
+       int slot_dist;
+
+       dest  = tw->daddr;
+       src   = tw->rcv_saddr;
+       destp = ntohs(tw->dport);
+       srcp  = ntohs(tw->sport);
+
+       slot_dist = tw->death_slot;
+       if(slot_dist > tcp_tw_death_row_slot)
+               slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot;
+       else
+               slot_dist = tcp_tw_death_row_slot - slot_dist;
+
+       sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+               " %02X %08X:%08X %02X:%08X %08X %5d %8d %d",
+               i, src, srcp, dest, destp, TCP_TIME_WAIT, 0, 0,
+               3, slot_dist * TCP_TWKILL_PERIOD, 0, 0, 0, 0);
+}
+
+int tcp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+       int len = 0, num = 0, i;
+       off_t begin, pos = 0;
+       char tmpbuf[129];
+
+       if (offset < 128)
+               len += sprintf(buffer, "%-127s\n",
+                              "  sl  local_address rem_address   st tx_queue "
+                              "rx_queue tr tm->when retrnsmt   uid  timeout inode");
+
+       pos = 128;
+       SOCKHASH_LOCK_READ();
+
+       /* First, walk listening socket table. */
+       for(i = 0; i < TCP_LHTABLE_SIZE; i++) {
+               struct sock *sk = tcp_listening_hash[i];
+
+               for (sk = tcp_listening_hash[i]; sk; sk = sk->next, num++) {
+                       struct open_request *req;
+                       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+                       if (sk->family != PF_INET)
+                               continue;
+                       pos += 128;
+                       if (pos >= offset) {
+                               get_tcp_sock(sk, tmpbuf, num);
+                               len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+                               if (len >= length)
+                                       goto out;
+                       }
+                       for (req = tp->syn_wait_queue; req; req = req->dl_next, num++) {
+                               if (req->sk)
+                                       continue;
+                               pos += 128;
+                               if (pos < offset)
+                                       continue;
+                               get_openreq(sk, req, tmpbuf, num);
+                               len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+                               if(len >= length) 
+                                       goto out;
+                       }
+               }
+       }
+
+       /* Next, walk established hash chain. */
+       for (i = 0; i < (tcp_ehash_size >> 1); i++) {
+               struct sock *sk;
+
+               for(sk = tcp_ehash[i]; sk; sk = sk->next, num++) {
+                       if (sk->family != PF_INET)
+                               continue;
+                       pos += 128;
+                       if (pos < offset)
+                               continue;
+                       get_tcp_sock(sk, tmpbuf, num);
+                       len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+                       if(len >= length)
+                               goto out;
+               }
+       }
+
+       /* Finally, walk time wait buckets. */
+       for (i = (tcp_ehash_size>>1); i < tcp_ehash_size; i++) {
+               struct tcp_tw_bucket *tw;
+               for (tw = (struct tcp_tw_bucket *)tcp_ehash[i];
+                    tw != NULL;
+                    tw = (struct tcp_tw_bucket *)tw->next, num++) {
+                       if (tw->family != PF_INET)
+                               continue;
+                       pos += 128;
+                       if (pos < offset)
+                               continue;
+                       get_timewait_sock(tw, tmpbuf, num);
+                       len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+                       if(len >= length)
+                               goto out;
+               }
+       }
+
+out:
+       SOCKHASH_UNLOCK_READ();
+
+       begin = len - (pos - offset);
+       *start = buffer + begin;
+       len -= begin;
+       if(len > length)
+               len = length;
+       if (len < 0)
+               len = 0; 
+       return len;
+}
+
  struct proto tcp_prot = {
-       (struct sock *)&tcp_prot,       /* sklist_next */
-       (struct sock *)&tcp_prot,       /* sklist_prev */
         tcp_close,                      /* close */
         tcp_v4_connect,                 /* connect */
         tcp_accept,                     /* accept */
@@ -1981,9 +2173,7 @@ struct proto tcp_prot = {
         tcp_v4_do_rcv,                  /* backlog_rcv */
         tcp_v4_hash,                    /* hash */
         tcp_v4_unhash,                  /* unhash */
-       tcp_v4_rehash,                  /* rehash */
-       tcp_good_socknum,               /* good_socknum */
-       tcp_v4_verify_bind,             /* verify_bind */
+       tcp_v4_get_port,                /* get_port */
         128,                            /* max_header */
         0,                              /* retransmits */
         "TCP",                          /* name */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c

index d23eef1431778278b376f22d1386c425b13350a1..05a92f7f772c65c91a23249959076a84eac1fcac 100644 (file)
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -5,7 +5,7 @@
   *
   *             Implementation of the Transmission Control Protocol(TCP).
   *
- * Version:    $Id: tcp_timer.c,v 1.64 1999/05/27 00:37:31 davem Exp $
+ * Version:    $Id: tcp_timer.c,v 1.65 1999/07/02 11:26:35 davem Exp $
   *
   * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
   *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -31,7 +31,6 @@ int sysctl_tcp_retries2 = TCP_RETR2;
  static void tcp_sltimer_handler(unsigned long);
  static void tcp_syn_recv_timer(unsigned long);
  static void tcp_keepalive(unsigned long data);
-static void tcp_bucketgc(unsigned long);
  static void tcp_twkill(unsigned long);
  
  struct timer_list      tcp_slow_timer = {
@@ -44,8 +43,7 @@ struct timer_list     tcp_slow_timer = {
  struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX] = {
         {ATOMIC_INIT(0), TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer},/* SYNACK    */
         {ATOMIC_INIT(0), TCP_KEEPALIVE_PERIOD, 0, tcp_keepalive},  /* KEEPALIVE */
-       {ATOMIC_INIT(0), TCP_TWKILL_PERIOD, 0, tcp_twkill},        /* TWKILL    */
-       {ATOMIC_INIT(0), TCP_BUCKETGC_PERIOD, 0, tcp_bucketgc}     /* BUCKETGC  */
+       {ATOMIC_INIT(0), TCP_TWKILL_PERIOD, 0, tcp_twkill}         /* TWKILL    */
  };
  
  const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
@@ -252,43 +250,6 @@ static __inline__ int tcp_keepopen_proc(struct sock *sk)
         return res;
  }
  
-/* Garbage collect TCP bind buckets. */
-static void tcp_bucketgc(unsigned long data)
-{
-       int i, reaped = 0;;
-
-       SOCKHASH_LOCK_WRITE_BH();
-       for(i = 0; i < tcp_bhash_size; i++) {
-               struct tcp_bind_bucket *tb = tcp_bhash[i];
-
-               while(tb) {
-                       struct tcp_bind_bucket *next = tb->next;
-
-                       if((tb->owners == NULL) &&
-                          !(tb->flags & TCPB_FLAG_LOCKED)) {
-                               reaped++;
-
-                               /* Unlink bucket. */
-                               if(tb->next)
-                                       tb->next->pprev = tb->pprev;
-                               *tb->pprev = tb->next;
-
-                               /* Finally, free it up. */
-                               kmem_cache_free(tcp_bucket_cachep, tb);
-                       }
-                       tb = next;
-               }
-       }
-       SOCKHASH_UNLOCK_WRITE_BH();
-
-       if(reaped != 0) {
-               struct tcp_sl_timer *slt = (struct tcp_sl_timer *)data;
-
-               /* Eat timer references. */
-               atomic_sub(reaped, &slt->count);
-       }
-}
-
  /* Kill off TIME_WAIT sockets once their lifetime has expired. */
  int tcp_tw_death_row_slot = 0;
  static struct tcp_tw_bucket *tcp_tw_death_row[TCP_TWKILL_SLOTS] =
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c

index 516304d4bd8f31c4d11c7e5d89b7440142a81608..2696a05cdde1d665720bb3e1c2650ff3a039fcc2 100644 (file)
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -5,7 +5,7 @@
   *
   *             The User Datagram Protocol (UDP).
   *
- * Version:    $Id: udp.c,v 1.70 1999/06/13 05:55:16 davem Exp $
+ * Version:    $Id: udp.c,v 1.71 1999/07/02 11:26:33 davem Exp $
   *
   * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
   *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -123,164 +123,102 @@ struct udp_mib          udp_statistics;
  
  struct sock *udp_hash[UDP_HTABLE_SIZE];
  
-static int udp_v4_verify_bind(struct sock *sk, unsigned short snum)
-{
-       struct sock *sk2;
-       int retval = 0, sk_reuse = sk->reuse;
-
-       SOCKHASH_LOCK_READ();
-       for(sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) {
-               if((sk2->num == snum) && (sk2 != sk)) {
-                       unsigned char state = sk2->state;
-                       int sk2_reuse = sk2->reuse;
-
-                       /* Two sockets can be bound to the same port if they're
-                        * bound to different interfaces.
-                        */
-
-                       if(sk2->bound_dev_if != sk->bound_dev_if)
-                               continue;
+/* Shared by v4/v6 udp. */
+int udp_port_rover = 0;
  
-                       if(!sk2->rcv_saddr || !sk->rcv_saddr) {
-                               if((!sk2_reuse)                 ||
-                                  (!sk_reuse)                  ||
-                                  (state == TCP_LISTEN)) {
-                                       retval = 1;
-                                       break;
-                               }
-                       } else if(sk2->rcv_saddr == sk->rcv_saddr) {
-                               if((!sk_reuse)                  ||
-                                  (!sk2_reuse)                 ||
-                                  (state == TCP_LISTEN)) {
-                                       retval = 1;
-                                       break;
-                               }
+static int udp_v4_get_port(struct sock *sk, unsigned short snum)
+{
+       SOCKHASH_LOCK_WRITE();
+       if (snum == 0) {
+               int best_size_so_far, best, result, i;
+
+               if (udp_port_rover > sysctl_local_port_range[1] ||
+                   udp_port_rover < sysctl_local_port_range[0])
+                       udp_port_rover = sysctl_local_port_range[0];
+               best_size_so_far = 32767;
+               best = result = udp_port_rover;
+               for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
+                       struct sock *sk;
+                       int size;
+
+                       sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+                       if (!sk) {
+                               if (result > sysctl_local_port_range[1])
+                                       result = sysctl_local_port_range[0] +
+                                               ((result - sysctl_local_port_range[0]) &
+                                                (UDP_HTABLE_SIZE - 1));
+                               goto gotit;
                         }
+                       size = 0;
+                       do {
+                               if (++size >= best_size_so_far)
+                                       goto next;
+                       } while ((sk = sk->next) != NULL);
+                       best_size_so_far = size;
+                       best = result;
+               next:
+               }
+               result = best;
+               for(;; result += UDP_HTABLE_SIZE) {
+                       if (result > sysctl_local_port_range[1])
+                               result = sysctl_local_port_range[0]
+                                       + ((result - sysctl_local_port_range[0]) &
+                                          (UDP_HTABLE_SIZE - 1));
+                       if (!udp_lport_inuse(result))
+                               break;
+               }
+gotit:
+               udp_port_rover = snum = result;
+       } else {
+               struct sock *sk2;
+
+               for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+                    sk2 != NULL;
+                    sk2 = sk2->next) {
+                       if (sk2->num == snum &&
+                           sk2 != sk &&
+                           sk2->bound_dev_if == sk->bound_dev_if &&
+                           (!sk2->rcv_saddr ||
+                            !sk->rcv_saddr ||
+                            sk2->rcv_saddr == sk->rcv_saddr) &&
+                           (!sk2->reuse || !sk->reuse))
+                               goto fail;
                 }
         }
-       SOCKHASH_UNLOCK_READ();
-       return retval;
-}
-
-static inline int udp_lport_inuse(u16 num)
-{
-       struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)];
-
-       for(; sk != NULL; sk = sk->next) {
-               if(sk->num == num)
-                       return 1;
-       }
+       sk->num = snum;
+       SOCKHASH_UNLOCK_WRITE();
         return 0;
-}
-
-/* Shared by v4/v6 udp. */
-unsigned short udp_good_socknum(void)
-{
-       int result;
-       static int start = 0;
-       int i, best, best_size_so_far;
-
-       SOCKHASH_LOCK_READ();
-        if (start > sysctl_local_port_range[1] || start < sysctl_local_port_range[0])
-                start = sysctl_local_port_range[0];
-
-       best_size_so_far = 32767;       /* "big" num */
-        best = result = start;
-
-        for(i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
-               struct sock *sk;
-               int size;
-
-               sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
-
-                if(!sk) {
-                        if (result > sysctl_local_port_range[1])
-                                result = sysctl_local_port_range[0]
-                                        + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1));
-                       goto out;
-                }
-
-               /* Is this one better than our best so far? */
-               size = 0;
-               do {
-                       if(++size >= best_size_so_far)
-                               goto next;
-               } while((sk = sk->next) != NULL);
-               best_size_so_far = size;
-               best = result;
-        next:
-       }
  
-       result = best;
-
-        for(;; result += UDP_HTABLE_SIZE) {
-                /* Get into range (but preserve hash bin)... */
-                if (result > sysctl_local_port_range[1])
-                        result = sysctl_local_port_range[0]
-                                + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1));
-                if (!udp_lport_inuse(result))
-                        break;
-        }
-out:
-       start = result;
-       SOCKHASH_UNLOCK_READ();
-       return result;
+fail:
+       SOCKHASH_UNLOCK_WRITE();
+       return 1;
  }
  
  static void udp_v4_hash(struct sock *sk)
  {
-       struct sock **skp;
-       int num = sk->num;
-
-       num &= (UDP_HTABLE_SIZE - 1);
-       skp = &udp_hash[num];
+       struct sock **skp = &udp_hash[sk->num & (UDP_HTABLE_SIZE - 1)];
  
         SOCKHASH_LOCK_WRITE();
-       sk->next = *skp;
+       if ((sk->next = *skp) != NULL)
+               (*skp)->pprev = &sk->next;
         *skp = sk;
-       sk->hashent = num;
+       sk->pprev = skp;
+       sk->prot->inuse++;
+       if(sk->prot->highestinuse < sk->prot->inuse)
+               sk->prot->highestinuse = sk->prot->inuse;
         SOCKHASH_UNLOCK_WRITE();
  }
  
  static void udp_v4_unhash(struct sock *sk)
  {
-       struct sock **skp;
-       int num = sk->num;
-
-       num &= (UDP_HTABLE_SIZE - 1);
-       skp = &udp_hash[num];
-
-       SOCKHASH_LOCK_WRITE();
-       while(*skp != NULL) {
-               if(*skp == sk) {
-                       *skp = sk->next;
-                       break;
-               }
-               skp = &((*skp)->next);
-       }
-       SOCKHASH_UNLOCK_WRITE();
-}
-
-static void udp_v4_rehash(struct sock *sk)
-{
-       struct sock **skp;
-       int num = sk->num;
-       int oldnum = sk->hashent;
-
-       num &= (UDP_HTABLE_SIZE - 1);
-       skp = &udp_hash[oldnum];
-
         SOCKHASH_LOCK_WRITE();
-       while(*skp != NULL) {
-               if(*skp == sk) {
-                       *skp = sk->next;
-                       break;
-               }
-               skp = &((*skp)->next);
+       if (sk->pprev) {
+               if (sk->next)
+                       sk->next->pprev = sk->pprev;
+               *sk->pprev = sk->next;
+               sk->pprev = NULL;
+               sk->prot->inuse--;
         }
-       sk->next = udp_hash[num];
-       udp_hash[num] = sk;
-       sk->hashent = num;
         SOCKHASH_UNLOCK_WRITE();
  }
  
@@ -653,7 +591,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
         if (msg->msg_name) {
                 struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
                 if (msg->msg_namelen < sizeof(*usin))
-                       return(-EINVAL);
+                       return -EINVAL;
                 if (usin->sin_family != AF_INET)
                         return -EINVAL;
  
@@ -788,7 +726,6 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
                 {
                         unsigned long amount;
  
-                       if (sk->state == TCP_LISTEN) return(-EINVAL);
                         amount = sock_wspace(sk);
                         return put_user(amount, (int *)arg);
                 }
@@ -798,8 +735,6 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
                         struct sk_buff *skb;
                         unsigned long amount;
  
-                       if (sk->state == TCP_LISTEN)
-                               return(-EINVAL);
                         amount = 0;
                         /* N.B. Is this interrupt safe??
                            -> Yes. Interrupts do not remove skbs. --ANK (980725)
@@ -817,7 +752,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
                 }
  
                 default:
-                       return(-ENOIOCTLCMD);
+                       return -ENOIOCTLCMD;
         }
         return(0);
  }
@@ -945,7 +880,7 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
  
         
         if (addr_len < sizeof(*usin)) 
-               return(-EINVAL);
+               return -EINVAL;
  
         /*
          *      1003.1g - break association.
@@ -961,7 +896,7 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
         }
  
         if (usin->sin_family && usin->sin_family != AF_INET) 
-               return(-EAFNOSUPPORT);
+               return -EAFNOSUPPORT;
  
         dst_release(xchg(&sk->dst_cache, NULL));
  
@@ -1226,9 +1161,69 @@ csum_error:
         return(0);
  }
  
+static void get_udp_sock(struct sock *sp, char *tmpbuf, int i)
+{
+       unsigned int dest, src;
+       __u16 destp, srcp;
+       int timer_active;
+       unsigned long timer_expires;
+
+       dest  = sp->daddr;
+       src   = sp->rcv_saddr;
+       destp = ntohs(sp->dport);
+       srcp  = ntohs(sp->sport);
+       timer_active = (sp->timer.prev != NULL) ? 2 : 0;
+       timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies);
+       sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+               " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+               i, src, srcp, dest, destp, sp->state, 
+               atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
+               timer_active, timer_expires-jiffies, 0,
+               sp->socket->inode->i_uid, timer_active ? sp->timeout : 0,
+               sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+int udp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+       int len = 0, num = 0, i;
+       off_t pos = 0;
+       off_t begin;
+       char tmpbuf[129];
+
+       if (offset < 128) 
+               len += sprintf(buffer, "%-127s\n",
+                              "  sl  local_address rem_address   st tx_queue "
+                              "rx_queue tr tm->when retrnsmt   uid  timeout inode");
+       pos = 128;
+       SOCKHASH_LOCK_READ();
+       for (i = 0; i < UDP_HTABLE_SIZE; i++) {
+               struct sock *sk;
+
+               for (sk = udp_hash[i]; sk; sk = sk->next, num++) {
+                       if (sk->family != PF_INET)
+                               continue;
+                       pos += 128;
+                       if (pos < offset)
+                               continue;
+                       get_udp_sock(sk, tmpbuf, i);
+                       len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+                       if(len >= length)
+                               goto out;
+               }
+       }
+out:
+       SOCKHASH_UNLOCK_READ();
+       begin = len - (pos - offset);
+       *start = buffer + begin;
+       len -= begin;
+       if(len > length)
+               len = length;
+       if (len < 0)
+               len = 0; 
+       return len;
+}
+
  struct proto udp_prot = {
-       (struct sock *)&udp_prot,       /* sklist_next */
-       (struct sock *)&udp_prot,       /* sklist_prev */
         udp_close,                      /* close */
         udp_connect,                    /* connect */
         NULL,                           /* accept */
@@ -1248,9 +1243,7 @@ struct proto udp_prot = {
         udp_queue_rcv_skb,              /* backlog_rcv */
         udp_v4_hash,                    /* hash */
         udp_v4_unhash,                  /* unhash */
-       udp_v4_rehash,                  /* rehash */
-       udp_good_socknum,               /* good_socknum */
-       udp_v4_verify_bind,             /* verify_bind */
+       udp_v4_get_port,                /* good_socknum */
         128,                            /* max_header */
         0,                              /* retransmits */
         "UDP",                          /* name */
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c

index f7f50df869ae80c11491016d0eb927622134ff05..f565921d31a3cacab955f16806365e27dcd15193 100644 (file)
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -7,7 +7,7 @@
   *
   *     Adapted from linux/net/ipv4/af_inet.c
   *
- *     $Id: af_inet6.c,v 1.44 1999/06/09 08:29:29 davem Exp $
+ *     $Id: af_inet6.c,v 1.45 1999/07/02 11:26:38 davem Exp $
   *
   *     This program is free software; you can redistribute it and/or
   *      modify it under the terms of the GNU General Public License
@@ -157,7 +157,6 @@ static int inet6_create(struct socket *sock, int protocol)
                  */
                 sk->sport = ntohs(sk->num);
                 sk->prot->hash(sk);
-               add_to_prot_sklist(sk);
         }
  
         if (sk->prot->init) {
@@ -205,13 +204,13 @@ static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                 
         addr_type = ipv6_addr_type(&addr->sin6_addr);
         if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
-               return(-EINVAL);
+               return -EINVAL;
  
         /* Check if the address belongs to the host. */
         if (addr_type == IPV6_ADDR_MAPPED) {
                 v4addr = addr->sin6_addr.s6_addr32[3];
                 if (inet_addr_type(v4addr) != RTN_LOCAL)
-                       return(-EADDRNOTAVAIL);
+                       return -EADDRNOTAVAIL;
         } else {
                 if (addr_type != IPV6_ADDR_ANY) {
                         /* ipv4 addr of the socket is invalid.  Only the
@@ -220,7 +219,7 @@ static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                         v4addr = LOOPBACK4_IPV6;
                         if (!(addr_type & IPV6_ADDR_MULTICAST)) {
                                 if (ipv6_chk_addr(&addr->sin6_addr, NULL, 0) == NULL)
-                                       return(-EADDRNOTAVAIL);
+                                       return -EADDRNOTAVAIL;
                         }
                 }
         }
@@ -236,21 +235,17 @@ static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                        sizeof(struct in6_addr));
  
         snum = ntohs(addr->sin6_port);
-       if (snum == 0) 
-               snum = sk->prot->good_socknum();
-       if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
-               return(-EACCES);
+       if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+               return -EACCES;
  
         /* Make sure we are allowed to bind here. */
-       if(sk->prot->verify_bind(sk, snum))
+       if(sk->prot->get_port(sk, snum) != 0)
                 return -EADDRINUSE;
  
-       sk->num = snum;
         sk->sport = ntohs(sk->num);
         sk->dport = 0;
         sk->daddr = 0;
-       sk->prot->rehash(sk);
-       add_to_prot_sklist(sk);
+       sk->prot->hash(sk);
  
         return(0);
  }
@@ -318,7 +313,7 @@ static int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
         sk = sock->sk;
         if (peer) {
                 if (!tcp_connected(sk->state))
-                       return(-ENOTCONN);
+                       return -ENOTCONN;
                 sin->sin6_port = sk->dport;
                 memcpy(&sin->sin6_addr, &sk->net_pinfo.af_inet6.daddr,
                        sizeof(struct in6_addr));
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c

index b83bdc34b02fbc64bd3f67550c26b790fe80e964..09845703b4edf4fb963a588fd6d2c63f7c2cb592 100644 (file)
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -7,7 +7,7 @@
   *             PROC file system.  This is very similar to the IPv4 version,
   *             except it reports the sockets in the INET6 address family.
   *
- * Version:    $Id: proc.c,v 1.10 1999/05/27 00:38:14 davem Exp $
+ * Version:    $Id: proc.c,v 1.11 1999/07/02 11:26:45 davem Exp $
   *
   * Authors:    David S. Miller (davem@caip.rutgers.edu)
   *
@@ -26,140 +26,6 @@
  #include <net/transp_v6.h>
  #include <net/ipv6.h>
  
-/* This is the main implementation workhorse of all these routines. */
-static int get__netinfo6(struct proto *pro, char *buffer, int format, char **start,
-                        off_t offset, int length)
-{
-       struct sock *sp;
-       struct tcp_opt *tp;
-       int timer_active, timer_active1, timer_active2;
-       unsigned long timer_expires;
-       struct in6_addr *dest, *src;
-       unsigned short destp, srcp;
-       int len = 0, i = 0;
-       off_t pos = 0;
-       off_t begin;
-       char tmpbuf[150];
-
-       if(offset < 149)
-               len += sprintf(buffer, "%-148s\n",
-                              "  sl  "                                         /* 6 */
-                              "local_address                         "         /* 38 */
-                              "remote_address                        "         /* 38 */
-                              "st tx_queue rx_queue tr tm->when retrnsmt"      /* 41 */
-                              "   uid  timeout inode");                        /* 21 */
-                                                                               /*----*/
-                                                                               /*144 */
-
-       pos = 149;
-       SOCKHASH_LOCK_READ();
-       sp = pro->sklist_next;
-       while(sp != (struct sock *)pro) {
-               struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sp;
-               int tw_bucket = 0;
-
-               pos += 149;
-               if(pos < offset)
-                       goto next;
-               tp = &(sp->tp_pinfo.af_tcp);
-               if((format == 0) && (sp->state == TCP_TIME_WAIT)) {
-                       tw_bucket = 1;
-                       dest  = &tw->v6_daddr;
-                       src   = &tw->v6_rcv_saddr;
-               } else {
-                       dest  = &sp->net_pinfo.af_inet6.daddr;
-                       src   = &sp->net_pinfo.af_inet6.rcv_saddr;
-               }
-               destp = ntohs(sp->dport);
-               srcp  = ntohs(sp->sport);
-
-               if((format == 0) && (sp->state == TCP_TIME_WAIT)) {
-                       extern int tcp_tw_death_row_slot;
-                       int slot_dist;
-
-                       timer_active1   = timer_active2 = 0;
-                       timer_active    = 3;
-                       slot_dist       = tw->death_slot;
-                       if(slot_dist > tcp_tw_death_row_slot)
-                               slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot;
-                       else
-                               slot_dist = tcp_tw_death_row_slot - slot_dist;
-                       timer_expires   = jiffies + (slot_dist * TCP_TWKILL_PERIOD);
-               } else {
-                       timer_active1 = tp->retransmit_timer.prev != NULL;
-                       timer_active2 = sp->timer.prev != NULL;
-                       timer_active = 0;
-                       timer_expires = (unsigned) -1;
-               }
-               if(timer_active1 && tp->retransmit_timer.expires < timer_expires) {
-                       timer_active = timer_active1;
-                       timer_expires = tp->retransmit_timer.expires;
-               }
-               if(timer_active2 && sp->timer.expires < timer_expires) {
-                       timer_active = timer_active2;
-                       timer_expires = sp->timer.expires;
-               }
-               if(timer_active == 0)
-                       timer_expires = jiffies;
-               sprintf(tmpbuf, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
-                       "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
-                       i,
-                       src->s6_addr32[0], src->s6_addr32[1],
-                       src->s6_addr32[2], src->s6_addr32[3], srcp,
-                       dest->s6_addr32[0], dest->s6_addr32[1],
-                       dest->s6_addr32[2], dest->s6_addr32[3], destp,
-                       sp->state,
-                       (tw_bucket ?
-                        0 :
-                        (format == 0) ?
-                        tp->write_seq-tp->snd_una :
-                        atomic_read(&sp->wmem_alloc)),
-                       (tw_bucket ?
-                        0 :
-                        (format == 0) ?
-                        tp->rcv_nxt-tp->copied_seq :
-                        atomic_read(&sp->rmem_alloc)),
-                       timer_active, timer_expires-jiffies,
-                       (tw_bucket ? 0 : tp->retransmits),
-                       ((!tw_bucket && sp->socket) ?
-                        sp->socket->inode->i_uid : 0),
-                       (!tw_bucket && timer_active) ? sp->timeout : 0,
-                       ((!tw_bucket && sp->socket) ?
-                        sp->socket->inode->i_ino : 0));
-
-               len += sprintf(buffer+len, "%-148s\n", tmpbuf);
-               if(len >= length)
-                       break;
-       next:
-               sp = sp->sklist_next;
-               i++;
-       }
-       SOCKHASH_UNLOCK_READ();
-
-       begin = len - (pos - offset);
-       *start = buffer + begin;
-       len -= begin;
-       if(len > length)
-               len = length;
-       return len;
-}
-
-/* These get exported and registered with procfs in af_inet6.c at init time. */
-int tcp6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
-       return get__netinfo6(&tcpv6_prot, buffer, 0, start, offset, length);
-}
-
-int udp6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
-       return get__netinfo6(&udpv6_prot, buffer, 1, start, offset, length);
-}
-
-int raw6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
-       return get__netinfo6(&rawv6_prot, buffer, 1, start, offset, length);
-}
-
  int afinet6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
  {
         int len = 0;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c

index 70394dc039b200b86210f91716eb6380bdfdee77..e0c78772382fe34f121f044ee1abfee7a8f8238f 100644 (file)
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -7,7 +7,7 @@
   *
   *     Adapted from linux/net/ipv4/raw.c
   *
- *     $Id: raw.c,v 1.26 1999/06/09 10:11:18 davem Exp $
+ *     $Id: raw.c,v 1.27 1999/07/02 11:26:40 davem Exp $
   *
   *     This program is free software; you can redistribute it and/or
   *      modify it under the terms of the GNU General Public License
@@ -45,57 +45,29 @@ struct sock *raw_v6_htable[RAWV6_HTABLE_SIZE];
  
  static void raw_v6_hash(struct sock *sk)
  {
-       struct sock **skp;
-       int num = sk->num;
+       struct sock **skp = &raw_v6_htable[sk->num & (RAWV6_HTABLE_SIZE - 1)];
  
-       num &= (RAWV6_HTABLE_SIZE - 1);
-       skp = &raw_v6_htable[num];
         SOCKHASH_LOCK_WRITE();
-       sk->next = *skp;
+       if ((sk->next = *skp) != NULL)
+               (*skp)->pprev = &sk->next;
         *skp = sk;
-       sk->hashent = num;
+       sk->pprev = skp;
+       sk->prot->inuse++;
+       if(sk->prot->highestinuse < sk->prot->inuse)
+               sk->prot->highestinuse = sk->prot->inuse;
         SOCKHASH_UNLOCK_WRITE();
  }
  
  static void raw_v6_unhash(struct sock *sk)
  {
-       struct sock **skp;
-       int num = sk->num;
-
-       num &= (RAWV6_HTABLE_SIZE - 1);
-       skp = &raw_v6_htable[num];
-
-       SOCKHASH_LOCK_WRITE();
-       while(*skp != NULL) {
-               if(*skp == sk) {
-                       *skp = sk->next;
-                       break;
-               }
-               skp = &((*skp)->next);
-       }
-       SOCKHASH_UNLOCK_WRITE();
-}
-
-static void raw_v6_rehash(struct sock *sk)
-{
-       struct sock **skp;
-       int num = sk->num;
-       int oldnum = sk->hashent;
-
-       num &= (RAWV6_HTABLE_SIZE - 1);
-       skp = &raw_v6_htable[oldnum];
-
         SOCKHASH_LOCK_WRITE();
-       while(*skp != NULL) {
-               if(*skp == sk) {
-                       *skp = sk->next;
-                       break;
-               }
-               skp = &((*skp)->next);
+       if (sk->pprev) {
+               if (sk->next)
+                       sk->next->pprev = sk->pprev;
+               *sk->pprev = sk->next;
+               sk->pprev = NULL;
+               sk->prot->inuse--;
         }
-       sk->next = raw_v6_htable[num];
-       raw_v6_htable[num] = sk;
-       sk->hashent = num;
         SOCKHASH_UNLOCK_WRITE();
  }
  
@@ -636,9 +608,80 @@ static int rawv6_init_sk(struct sock *sk)
         return(0);
  }
  
+static void get_raw6_sock(struct sock *sp, char *tmpbuf, int i)
+{
+       struct in6_addr *dest, *src;
+       __u16 destp, srcp;
+       int timer_active;
+       unsigned long timer_expires;
+
+       dest  = &sp->net_pinfo.af_inet6.daddr;
+       src   = &sp->net_pinfo.af_inet6.rcv_saddr;
+       destp = ntohs(sp->dport);
+       srcp  = ntohs(sp->sport);
+       timer_active = (sp->timer.prev != NULL) ? 2 : 0;
+       timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies);
+       sprintf(tmpbuf,
+               "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+               "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+               i,
+               src->s6_addr32[0], src->s6_addr32[1],
+               src->s6_addr32[2], src->s6_addr32[3], srcp,
+               dest->s6_addr32[0], dest->s6_addr32[1],
+               dest->s6_addr32[2], dest->s6_addr32[3], destp,
+               sp->state, 
+               atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
+               timer_active, timer_expires-jiffies, 0,
+               sp->socket->inode->i_uid, timer_active ? sp->timeout : 0,
+               sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+int raw6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+       int len = 0, num = 0, i;
+       off_t pos = 0;
+       off_t begin;
+       char tmpbuf[150];
+
+       if (offset < 149)
+               len += sprintf(buffer, "%-148s\n",
+                              "  sl  "                                         /* 6 */
+                              "local_address                         "         /* 38 */
+                              "remote_address                        "         /* 38 */
+                              "st tx_queue rx_queue tr tm->when retrnsmt"      /* 41 */
+                              "   uid  timeout inode");                        /* 21 */
+                                                                               /*----*/
+                                                                               /*144 */
+       pos = 149;
+       SOCKHASH_LOCK_READ();
+       for (i = 0; i < RAWV6_HTABLE_SIZE; i++) {
+               struct sock *sk;
+
+               for (sk = raw_v6_htable[i]; sk; sk = sk->next, num++) {
+                       if (sk->family != PF_INET6)
+                               continue;
+                       pos += 149;
+                       if (pos < offset)
+                               continue;
+                       get_raw6_sock(sk, tmpbuf, i);
+                       len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+                       if(len >= length)
+                               goto out;
+               }
+       }
+out:
+       SOCKHASH_UNLOCK_READ();
+       begin = len - (pos - offset);
+       *start = buffer + begin;
+       len -= begin;
+       if(len > length)
+               len = length;
+       if (len < 0)
+               len = 0; 
+       return len;
+}
+
  struct proto rawv6_prot = {
-       (struct sock *)&rawv6_prot,     /* sklist_next */
-       (struct sock *)&rawv6_prot,     /* sklist_prev */
         rawv6_close,                    /* close */
         udpv6_connect,                  /* connect */
         NULL,                           /* accept */
@@ -658,9 +701,7 @@ struct proto rawv6_prot = {
         rawv6_rcv_skb,                  /* backlog_rcv */
         raw_v6_hash,                    /* hash */
         raw_v6_unhash,                  /* unhash */
-       raw_v6_rehash,                  /* rehash */
-       NULL,                           /* good_socknum */
-       NULL,                           /* verify_bind */
+       NULL,                           /* get_port */
         128,                            /* max_header */
         0,                              /* retransmits */
         "RAW",                          /* name */
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c

index 2164e245e547d8090acd503249812ed57601d81b..4cb6a56e92d13cc200d5212aa0a129dbd25b6058 100644 (file)
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -5,7 +5,7 @@
   *     Authors:
   *     Pedro Roque             <roque@di.fc.ul.pt>     
   *
- *     $Id: tcp_ipv6.c,v 1.108 1999/06/09 08:29:43 davem Exp $
+ *     $Id: tcp_ipv6.c,v 1.109 1999/07/02 11:26:41 davem Exp $
   *
   *     Based on: 
   *     linux/net/ipv4/tcp.c
@@ -84,101 +84,124 @@ static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
   * But it doesn't matter, the recalculation is in the rarest path
   * this function ever takes.
   */
-static int tcp_v6_verify_bind(struct sock *sk, unsigned short snum)
+static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
  {
         struct tcp_bind_bucket *tb;
-       int result = 0;
  
         SOCKHASH_LOCK_WRITE();
-       for(tb = tcp_bhash[tcp_bhashfn(snum)];
-           (tb && (tb->port != snum));
-           tb = tb->next)
-               ;
-       if(tb && tb->owners) {
-               /* Fast path for reuse ports, see include/net/tcp.h for a very
-                * detailed description of why this works, and why it is worth
-                * the effort at all. -DaveM
-                */
-               if((tb->flags & TCPB_FLAG_FASTREUSE)    &&
-                  (sk->reuse != 0)) {
-                       goto go_like_smoke;
+       if (snum == 0) {
+               int rover = tcp_port_rover;
+               int low = sysctl_local_port_range[0];
+               int high = sysctl_local_port_range[1];
+               int remaining = (high - low) + 1;
+
+               do {    rover++;
+                       if ((rover < low) || (rover > high))
+                               rover = low;
+                       tb = tcp_bhash[tcp_bhashfn(rover)];
+                       for ( ; tb; tb = tb->next)
+                               if (tb->port == rover)
+                                       goto next;
+                       break;
+               next:
+               } while (--remaining > 0);
+               tcp_port_rover = rover;
+
+               /* Exhausted local port range during search? */
+               if (remaining <= 0)
+                       goto fail;
+
+               /* OK, here is the one we will use. */
+               snum = rover;
+               tb = NULL;
+       } else {
+               for (tb = tcp_bhash[tcp_bhashfn(snum)];
+                    tb != NULL;
+                    tb = tb->next)
+                       if (tb->port == snum)
+                               break;
+       }
+       if (tb != NULL && tb->owners != NULL) {
+               if (tb->fastreuse != 0 && sk->reuse != 0) {
+                       goto success;
                 } else {
-                       struct sock *sk2;
+                       struct sock *sk2 = tb->owners;
                         int sk_reuse = sk->reuse;
                         int addr_type = ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr);
  
                         /* We must walk the whole port owner list in this case. -DaveM */
-                       for(sk2 = tb->owners; sk2; sk2 = sk2->bind_next) {
-                               if(sk->bound_dev_if == sk2->bound_dev_if) {
-                                       if(!sk_reuse || !sk2->reuse || sk2->state == TCP_LISTEN) {
-                                               if(addr_type == IPV6_ADDR_ANY   ||
-                                                  !sk2->rcv_saddr              ||
-                                                  !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
-                                                                 &sk2->net_pinfo.af_inet6.rcv_saddr))
+                       for( ; sk2 != NULL; sk2 = sk2->bind_next) {
+                               if (sk->bound_dev_if == sk2->bound_dev_if) {
+                                       if (!sk_reuse   ||
+                                           !sk2->reuse ||
+                                           sk2->state == TCP_LISTEN) {
+                                               if (!sk2->rcv_saddr     ||
+                                                   !addr_type == IPV6_ADDR_ANY ||
+                                                   !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
+                                                                  &sk2->net_pinfo.af_inet6.rcv_saddr))
                                                         break;
                                         }
                                 }
                         }
-                       if(sk2 != NULL)
-                               result = 1;
+                       /* If we found a conflict, fail. */
+                       if (sk2 != NULL)
+                               goto fail;
                 }
         }
-       if(result == 0) {
-               if(tb == NULL) {
-                       if((tb = tcp_bucket_create(snum)) == NULL)
-                               result = 1;
-                       else if (sk->reuse && sk->state != TCP_LISTEN)
-                               tb->flags |= TCPB_FLAG_FASTREUSE;
-               } else {
-                       /* It could be pending garbage collection, this
-                        * kills the race and prevents it from disappearing
-                        * out from under us by the time we use it.  -DaveM
-                        */
-                       if(tb->owners == NULL) {
-                               if (!(tb->flags & TCPB_FLAG_LOCKED)) {
-                                       tb->flags = (TCPB_FLAG_LOCKED |
-                                                    ((sk->reuse &&
-                                                      sk->state != TCP_LISTEN) ?
-                                                     TCPB_FLAG_FASTREUSE : 0));
-                                       tcp_dec_slow_timer(TCP_SLT_BUCKETGC);
-                               } else if (!(tb->flags & TCPB_FLAG_GOODSOCKNUM)) {
-                                       /* Someone is in between the bind
-                                        * and the actual connect or listen.
-                                        * See if it was a legitimate reuse
-                                        * and we are as well, else punt.
-                                        */
-                                       if (sk->reuse == 0 ||
-                                           !(tb->flags & TCPB_FLAG_FASTREUSE))
-                                               result = 1;
-                               } else
-                                       tb->flags &= ~TCPB_FLAG_GOODSOCKNUM;
-                       }
-               }
-       }
-go_like_smoke:
+       if (tb == NULL &&
+           (tb = tcp_bucket_create(snum)) == NULL)
+                       goto fail;
+       if (tb->owners == NULL) {
+               if (sk->reuse && sk->state != TCP_LISTEN)
+                       tb->fastreuse = 1;
+               else
+                       tb->fastreuse = 0;
+       } else if (tb->fastreuse &&
+                  ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))
+               tb->fastreuse = 0;
+
+success:
+       sk->num = snum;
+       if ((sk->bind_next = tb->owners) != NULL)
+               tb->owners->bind_pprev = &sk->bind_next;
+       tb->owners = sk;
+       sk->bind_pprev = &tb->owners;
+       sk->prev = (struct sock *) tb;
+
         SOCKHASH_UNLOCK_WRITE();
-       return result;
+       return 0;
+
+fail:
+       SOCKHASH_UNLOCK_WRITE();
+       return 1;
  }
  
  static void tcp_v6_hash(struct sock *sk)
  {
-       /* Well, I know that it is ugly...
-          All this ->prot, ->af_specific etc. need LARGE cleanup --ANK
-        */
-       if (sk->tp_pinfo.af_tcp.af_specific == &ipv6_mapped) {
-               tcp_prot.hash(sk);
-               return;
-       }
         if(sk->state != TCP_CLOSE) {
                 struct sock **skp;
  
+               /* Well, I know that it is ugly...
+                * All this ->prot, ->af_specific etc. need LARGE cleanup --ANK
+                */
+               if (sk->tp_pinfo.af_tcp.af_specific == &ipv6_mapped) {
+                       tcp_prot.hash(sk);
+                       return;
+               }
+
+               if(sk->state == TCP_LISTEN)
+                       skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
+               else
+                       skp = &tcp_ehash[(sk->hashent = tcp_v6_sk_hashfn(sk))];
+
                 SOCKHASH_LOCK_WRITE();
-               skp = &tcp_ehash[(sk->hashent = tcp_v6_sk_hashfn(sk))];
                 if((sk->next = *skp) != NULL)
                         (*skp)->pprev = &sk->next;
                 *skp = sk;
                 sk->pprev = skp;
-               tcp_sk_bindify(sk);
+               sk->prot->inuse++;
+               if(sk->prot->highestinuse < sk->prot->inuse)
+                       sk->prot->highestinuse = sk->prot->inuse;
                 SOCKHASH_UNLOCK_WRITE();
         }
  }
@@ -191,39 +214,9 @@ static void tcp_v6_unhash(struct sock *sk)
                         sk->next->pprev = sk->pprev;
                 *sk->pprev = sk->next;
                 sk->pprev = NULL;
-               tcp_sk_unbindify(sk);
-               tcp_reg_zap(sk);
-       }
-       SOCKHASH_UNLOCK_WRITE();
-}
-
-static void tcp_v6_rehash(struct sock *sk)
-{
-       unsigned char state;
-
-       SOCKHASH_LOCK_WRITE();
-       state = sk->state;
-       if(sk->pprev != NULL) {
-               if(sk->next)
-                       sk->next->pprev = sk->pprev;
-               *sk->pprev = sk->next;
-               sk->pprev = NULL;
+               sk->prot->inuse--;
                 tcp_reg_zap(sk);
-       }
-       if(state != TCP_CLOSE) {
-               struct sock **skp;
-
-               if(state == TCP_LISTEN)
-                       skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
-               else
-                       skp = &tcp_ehash[(sk->hashent = tcp_v6_sk_hashfn(sk))];
-
-               if((sk->next = *skp) != NULL)
-                       (*skp)->pprev = &sk->next;
-               *skp = sk;
-               sk->pprev = skp;
-               if(state == TCP_LISTEN)
-                       tcp_sk_bindify(sk);
+               __tcp_put_port(sk);
         }
         SOCKHASH_UNLOCK_WRITE();
  }
@@ -1063,8 +1056,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
         newsk->rcv_saddr= LOOPBACK4_IPV6;
  
         newsk->prot->hash(newsk);
-       add_to_prot_sklist(newsk);
-
+       tcp_inherit_port(sk, newsk);
         sk->data_ready(sk, 0); /* Deliver SIGIO */ 
  
         return newsk;
@@ -1666,18 +1658,214 @@ static int tcp_v6_destroy_sock(struct sock *sk)
  
         /* Clean up a locked TCP bind bucket, this only happens if a
          * port is allocated for a socket, but it never fully connects.
-        * In which case we will find num to be non-zero and daddr to
-        * be zero.
          */
-       if(ipv6_addr_any(&(sk->net_pinfo.af_inet6.daddr)) && sk->num != 0)
-               tcp_bucket_unlock(sk);
+       if(sk->prev != NULL)
+               tcp_put_port(sk);
  
         return inet6_destroy_sock(sk);
  }
  
+/* Proc filesystem TCPv6 sock list dumping. */
+static void get_openreq6(struct sock *sk, struct open_request *req, char *tmpbuf, int i)
+{
+       struct in6_addr *dest, *src;
+
+       src = &req->af.v6_req.loc_addr;
+       dest = &req->af.v6_req.rmt_addr;
+       sprintf(tmpbuf,
+               "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+               "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d",
+               i,
+               src->s6_addr32[0], src->s6_addr32[1],
+               src->s6_addr32[2], src->s6_addr32[3],
+               ntohs(sk->sport),
+               dest->s6_addr32[0], dest->s6_addr32[1],
+               dest->s6_addr32[2], dest->s6_addr32[3],
+               ntohs(req->rmt_port),
+               TCP_SYN_RECV,
+               0,0, /* could print option size, but that is af dependent. */
+               1,   /* timers active (only the expire timer) */  
+               (unsigned long)(req->expires - jiffies), 
+               req->retrans,
+               sk->socket ? sk->socket->inode->i_uid : 0,
+               0,  /* non standard timer */  
+               0 /* open_requests have no inode */
+               ); 
+}
+
+static void get_tcp6_sock(struct sock *sp, char *tmpbuf, int i)
+{
+       struct in6_addr *dest, *src;
+       __u16 destp, srcp;
+       int timer_active, timer_active1, timer_active2;
+       unsigned long timer_expires;
+       struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
+
+       dest  = &sp->net_pinfo.af_inet6.daddr;
+       src   = &sp->net_pinfo.af_inet6.rcv_saddr;
+       destp = ntohs(sp->dport);
+       srcp  = ntohs(sp->sport);
+       timer_active1 = tp->retransmit_timer.prev != NULL;
+       timer_active2 = sp->timer.prev != NULL;
+       timer_active    = 0;
+       timer_expires   = (unsigned) -1;
+       if (timer_active1 && tp->retransmit_timer.expires < timer_expires) {
+               timer_active    = 1;
+               timer_expires   = tp->retransmit_timer.expires;
+       }
+       if (timer_active2 && sp->timer.expires < timer_expires) {
+               timer_active    = 2;
+               timer_expires   = sp->timer.expires;
+       }
+       if(timer_active == 0)
+               timer_expires = jiffies;
+
+       sprintf(tmpbuf,
+               "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+               "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+               i,
+               src->s6_addr32[0], src->s6_addr32[1],
+               src->s6_addr32[2], src->s6_addr32[3], srcp,
+               dest->s6_addr32[0], dest->s6_addr32[1],
+               dest->s6_addr32[2], dest->s6_addr32[3], destp,
+               sp->state, 
+               tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
+               timer_active, timer_expires-jiffies,
+               tp->retransmits,
+               sp->socket ? sp->socket->inode->i_uid : 0,
+               timer_active ? sp->timeout : 0,
+               sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+static void get_timewait6_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
+{
+       extern int tcp_tw_death_row_slot;
+       struct in6_addr *dest, *src;
+       __u16 destp, srcp;
+       int slot_dist;
+
+       dest  = &tw->v6_daddr;
+       src   = &tw->v6_rcv_saddr;
+       destp = ntohs(tw->dport);
+       srcp  = ntohs(tw->sport);
+
+       slot_dist = tw->death_slot;
+       if(slot_dist > tcp_tw_death_row_slot)
+               slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot;
+       else
+               slot_dist = tcp_tw_death_row_slot - slot_dist;
+
+       sprintf(tmpbuf,
+               "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+               "%02X %08X:%08X %02X:%08X %08X %5d %8d %d",
+               i,
+               src->s6_addr32[0], src->s6_addr32[1],
+               src->s6_addr32[2], src->s6_addr32[3], srcp,
+               dest->s6_addr32[0], dest->s6_addr32[1],
+               dest->s6_addr32[2], dest->s6_addr32[3], destp,
+               TCP_TIME_WAIT, 0, 0,
+               3, slot_dist * TCP_TWKILL_PERIOD, 0, 0, 0, 0);
+}
+
+int tcp6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+       int len = 0, num = 0, i;
+       off_t begin, pos = 0;
+       char tmpbuf[150];
+
+       if(offset < 149)
+               len += sprintf(buffer, "%-148s\n",
+                              "  sl  "                                         /* 6 */
+                              "local_address                         "         /* 38 */
+                              "remote_address                        "         /* 38 */
+                              "st tx_queue rx_queue tr tm->when retrnsmt"      /* 41 */
+                              "   uid  timeout inode");                        /* 21 */
+                                                                               /*----*/
+                                                                               /*144 */
+
+       pos = 149;
+       SOCKHASH_LOCK_READ();
+
+       /* First, walk listening socket table. */
+       for(i = 0; i < TCP_LHTABLE_SIZE; i++) {
+               struct sock *sk = tcp_listening_hash[i];
+
+               for (sk = tcp_listening_hash[i]; sk; sk = sk->next, num++) {
+                       struct open_request *req;
+                       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+                       if (sk->family != PF_INET6)
+                               continue;
+                       pos += 149;
+                       if (pos >= offset) {
+                               get_tcp6_sock(sk, tmpbuf, num);
+                               len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+                               if (len >= length)
+                                       goto out;
+                       }
+                       for (req = tp->syn_wait_queue; req; req = req->dl_next, num++) {
+                               if (req->sk)
+                                       continue;
+                               pos += 149;
+                               if (pos < offset)
+                                       continue;
+                               get_openreq6(sk, req, tmpbuf, num);
+                               len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+                               if(len >= length) 
+                                       goto out;
+                       }
+               }
+       }
+
+       /* Next, walk established hash chain. */
+       for (i = 0; i < (tcp_ehash_size >> 1); i++) {
+               struct sock *sk;
+
+               for(sk = tcp_ehash[i]; sk; sk = sk->next, num++) {
+                       if (sk->family != PF_INET6)
+                               continue;
+                       pos += 149;
+                       if (pos < offset)
+                               continue;
+                       get_tcp6_sock(sk, tmpbuf, num);
+                       len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+                       if(len >= length)
+                               goto out;
+               }
+       }
+
+       /* Finally, walk time wait buckets. */
+       for (i = (tcp_ehash_size>>1); i < tcp_ehash_size; i++) {
+               struct tcp_tw_bucket *tw;
+               for (tw = (struct tcp_tw_bucket *)tcp_ehash[i];
+                    tw != NULL;
+                    tw = (struct tcp_tw_bucket *)tw->next, num++) {
+                       if (tw->family != PF_INET6)
+                               continue;
+                       pos += 149;
+                       if (pos < offset)
+                               continue;
+                       get_timewait6_sock(tw, tmpbuf, num);
+                       len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+                       if(len >= length)
+                               goto out;
+               }
+       }
+
+out:
+       SOCKHASH_UNLOCK_READ();
+
+       begin = len - (pos - offset);
+       *start = buffer + begin;
+       len -= begin;
+       if(len > length)
+               len = length;
+       if (len < 0)
+               len = 0; 
+       return len;
+}
+
  struct proto tcpv6_prot = {
-       (struct sock *)&tcpv6_prot,     /* sklist_next */
-       (struct sock *)&tcpv6_prot,     /* sklist_prev */
         tcp_close,                      /* close */
         tcp_v6_connect,                 /* connect */
         tcp_accept,                     /* accept */
@@ -1697,9 +1885,7 @@ struct proto tcpv6_prot = {
         tcp_v6_do_rcv,                  /* backlog_rcv */
         tcp_v6_hash,                    /* hash */
         tcp_v6_unhash,                  /* unhash */
-       tcp_v6_rehash,                  /* rehash */
-       tcp_good_socknum,               /* good_socknum */
-       tcp_v6_verify_bind,             /* verify_bind */
+       tcp_v6_get_port,                /* get_port */
         128,                            /* max_header */
         0,                              /* retransmits */
         "TCPv6",                        /* name */
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c

index da020d8fbee6f85cf23e34a3b58476a4643b98b4..b3045c694d6293a0e53742b735386add89c61155 100644 (file)
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -7,7 +7,7 @@
   *
   *     Based on linux/ipv4/udp.c
   *
- *     $Id: udp.c,v 1.42 1999/06/09 10:11:24 davem Exp $
+ *     $Id: udp.c,v 1.43 1999/07/02 11:26:44 davem Exp $
   *
   *     This program is free software; you can redistribute it and/or
   *      modify it under the terms of the GNU General Public License
@@ -49,101 +49,102 @@ struct udp_mib udp_stats_in6;
  /* Grrr, addr_type already calculated by caller, but I don't want
   * to add some silly "cookie" argument to this method just for that.
   */
-static int udp_v6_verify_bind(struct sock *sk, unsigned short snum)
+static int udp_v6_get_port(struct sock *sk, unsigned short snum)
  {
-       struct sock *sk2;
-       int addr_type = ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr);
-       int retval = 0, sk_reuse = sk->reuse;
-
-       SOCKHASH_LOCK_READ();
-       for(sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) {
-               if((sk2->num == snum) && (sk2 != sk)) {
-                       unsigned char state = sk2->state;
-                       int sk2_reuse = sk2->reuse;
-
-                       /* Two sockets can be bound to the same port if they're
-                        * bound to different interfaces.
-                        */
-
-                       if(sk2->bound_dev_if != sk->bound_dev_if)
-                               continue;
-
-                       if(addr_type == IPV6_ADDR_ANY || (!sk2->rcv_saddr)) {
-                               if((!sk2_reuse)                 ||
-                                  (!sk_reuse)                  ||
-                                  (state == TCP_LISTEN)) {
-                                       retval = 1;
-                                       break;
-                               }
-                       } else if(!ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
-                                                &sk2->net_pinfo.af_inet6.rcv_saddr)) {
-                               if((!sk_reuse)                  ||
-                                  (!sk2_reuse)                 ||
-                                  (state == TCP_LISTEN)) {
-                                       retval = 1;
-                                       break;
-                               }
+       SOCKHASH_LOCK_WRITE();
+       if (snum == 0) {
+               int best_size_so_far, best, result, i;
+
+               if (udp_port_rover > sysctl_local_port_range[1] ||
+                   udp_port_rover < sysctl_local_port_range[0])
+                       udp_port_rover = sysctl_local_port_range[0];
+               best_size_so_far = 32767;
+               best = result = udp_port_rover;
+               for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
+                       struct sock *sk;
+                       int size;
+
+                       sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+                       if (!sk) {
+                               if (result > sysctl_local_port_range[1])
+                                       result = sysctl_local_port_range[0] +
+                                               ((result - sysctl_local_port_range[0]) &
+                                                (UDP_HTABLE_SIZE - 1));
+                               goto gotit;
                         }
+                       size = 0;
+                       do {
+                               if (++size >= best_size_so_far)
+                                       goto next;
+                       } while ((sk = sk->next) != NULL);
+                       best_size_so_far = size;
+                       best = result;
+               next:
+               }
+               result = best;
+               for(;; result += UDP_HTABLE_SIZE) {
+                       if (result > sysctl_local_port_range[1])
+                               result = sysctl_local_port_range[0]
+                                       + ((result - sysctl_local_port_range[0]) &
+                                          (UDP_HTABLE_SIZE - 1));
+                       if (!udp_lport_inuse(result))
+                               break;
+               }
+gotit:
+               udp_port_rover = snum = result;
+       } else {
+               struct sock *sk2;
+               int addr_type = ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr);
+
+               for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+                    sk2 != NULL;
+                    sk2 = sk2->next) {
+                       if (sk2->num == snum &&
+                           sk2 != sk &&
+                           sk2->bound_dev_if == sk->bound_dev_if &&
+                           (!sk2->rcv_saddr ||
+                            addr_type == IPV6_ADDR_ANY ||
+                            !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
+                                           &sk2->net_pinfo.af_inet6.rcv_saddr)) &&
+                           (!sk2->reuse || !sk->reuse))
+                               goto fail;
                 }
         }
-       SOCKHASH_UNLOCK_READ();
-       return retval;
-}
-
-static void udp_v6_hash(struct sock *sk)
-{
-       struct sock **skp;
-       int num = sk->num;
  
-       num &= (UDP_HTABLE_SIZE - 1);
-       skp = &udp_hash[num];
+       sk->num = snum;
+       SOCKHASH_UNLOCK_WRITE();
+       return 0;
  
-       SOCKHASH_LOCK_WRITE();
-       sk->next = *skp;
-       *skp = sk;
-       sk->hashent = num;
+fail:
         SOCKHASH_UNLOCK_WRITE();
+       return 1;
  }
  
-static void udp_v6_unhash(struct sock *sk)
+static void udp_v6_hash(struct sock *sk)
  {
-       struct sock **skp;
-       int num = sk->num;
-
-       num &= (UDP_HTABLE_SIZE - 1);
-       skp = &udp_hash[num];
+       struct sock **skp = &udp_hash[sk->num & (UDP_HTABLE_SIZE - 1)];
  
         SOCKHASH_LOCK_WRITE();
-       while(*skp != NULL) {
-               if(*skp == sk) {
-                       *skp = sk->next;
-                       break;
-               }
-               skp = &((*skp)->next);
-       }
+       if ((sk->next = *skp) != NULL)
+               (*skp)->pprev = &sk->next;
+       *skp = sk;
+       sk->pprev = skp;
+       sk->prot->inuse++;
+       if(sk->prot->highestinuse < sk->prot->inuse)
+               sk->prot->highestinuse = sk->prot->inuse;
         SOCKHASH_UNLOCK_WRITE();
  }
  
-static void udp_v6_rehash(struct sock *sk)
+static void udp_v6_unhash(struct sock *sk)
  {
-       struct sock **skp;
-       int num = sk->num;
-       int oldnum = sk->hashent;
-
-       num &= (UDP_HTABLE_SIZE - 1);
-       skp = &udp_hash[oldnum];
-
         SOCKHASH_LOCK_WRITE();
-       while(*skp != NULL) {
-               if(*skp == sk) {
-                       *skp = sk->next;
-                       break;
-               }
-               skp = &((*skp)->next);
+       if (sk->pprev) {
+               if (sk->next)
+                       sk->next->pprev = sk->pprev;
+               *sk->pprev = sk->next;
+               sk->pprev = NULL;
+               sk->prot->inuse--;
         }
-       sk->next = udp_hash[num];
-       udp_hash[num] = sk;
-       sk->hashent = num;
         SOCKHASH_UNLOCK_WRITE();
  }
  
@@ -216,10 +217,10 @@ int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
         }
  
         if (addr_len < sizeof(*usin)) 
-               return(-EINVAL);
+               return -EINVAL;
  
         if (usin->sin6_family && usin->sin6_family != AF_INET6) 
-               return(-EAFNOSUPPORT);
+               return -EAFNOSUPPORT;
  
         fl.fl6_flowlabel = 0;
         if (np->sndflow) {
@@ -764,7 +765,7 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen)
                 return -EMSGSIZE;
         
         if (msg->msg_flags & ~(MSG_DONTROUTE|MSG_DONTWAIT))
-               return(-EINVAL);
+               return -EINVAL;
  
         fl.fl6_flowlabel = 0;
  
@@ -773,13 +774,13 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen)
                         return udp_sendmsg(sk, msg, ulen);
  
                 if (addr_len < sizeof(*sin6))
-                       return(-EINVAL);
+                       return -EINVAL;
  
                 if (sin6->sin6_family && sin6->sin6_family != AF_INET6)
-                       return(-EINVAL);
+                       return -EINVAL;
  
                 if (sin6->sin6_port == 0)
-                       return(-EINVAL);
+                       return -EINVAL;
  
                 udh.uh.dest = sin6->sin6_port;
                 daddr = &sin6->sin6_addr;
@@ -800,7 +801,7 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen)
                         daddr = &sk->net_pinfo.af_inet6.daddr;
         } else {
                 if (sk->state != TCP_ESTABLISHED)
-                       return(-ENOTCONN);
+                       return -ENOTCONN;
  
                 udh.uh.dest = sk->dport;
                 daddr = &sk->net_pinfo.af_inet6.daddr;
@@ -885,10 +886,80 @@ static struct inet6_protocol udpv6_protocol =
         "UDPv6"                 /* name                 */
  };
  
+static void get_udp6_sock(struct sock *sp, char *tmpbuf, int i)
+{
+       struct in6_addr *dest, *src;
+       __u16 destp, srcp;
+       int timer_active;
+       unsigned long timer_expires;
+
+       dest  = &sp->net_pinfo.af_inet6.daddr;
+       src   = &sp->net_pinfo.af_inet6.rcv_saddr;
+       destp = ntohs(sp->dport);
+       srcp  = ntohs(sp->sport);
+       timer_active = (sp->timer.prev != NULL) ? 2 : 0;
+       timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies);
+       sprintf(tmpbuf,
+               "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+               "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+               i,
+               src->s6_addr32[0], src->s6_addr32[1],
+               src->s6_addr32[2], src->s6_addr32[3], srcp,
+               dest->s6_addr32[0], dest->s6_addr32[1],
+               dest->s6_addr32[2], dest->s6_addr32[3], destp,
+               sp->state, 
+               atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
+               timer_active, timer_expires-jiffies, 0,
+               sp->socket->inode->i_uid, timer_active ? sp->timeout : 0,
+               sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+int udp6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+       int len = 0, num = 0, i;
+       off_t pos = 0;
+       off_t begin;
+       char tmpbuf[150];
+
+       if (offset < 149)
+               len += sprintf(buffer, "%-148s\n",
+                              "  sl  "                                         /* 6 */
+                              "local_address                         "         /* 38 */
+                              "remote_address                        "         /* 38 */
+                              "st tx_queue rx_queue tr tm->when retrnsmt"      /* 41 */
+                              "   uid  timeout inode");                        /* 21 */
+                                                                               /*----*/
+                                                                               /*144 */
+       pos = 149;
+       SOCKHASH_LOCK_READ();
+       for (i = 0; i < UDP_HTABLE_SIZE; i++) {
+               struct sock *sk;
+
+               for (sk = udp_hash[i]; sk; sk = sk->next, num++) {
+                       if (sk->family != PF_INET6)
+                               continue;
+                       pos += 149;
+                       if (pos < offset)
+                               continue;
+                       get_udp6_sock(sk, tmpbuf, i);
+                       len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+                       if(len >= length)
+                               goto out;
+               }
+       }
+out:
+       SOCKHASH_UNLOCK_READ();
+       begin = len - (pos - offset);
+       *start = buffer + begin;
+       len -= begin;
+       if(len > length)
+               len = length;
+       if (len < 0)
+               len = 0; 
+       return len;
+}
  
  struct proto udpv6_prot = {
-       (struct sock *)&udpv6_prot,     /* sklist_next */
-       (struct sock *)&udpv6_prot,     /* sklist_prev */
         udpv6_close,                    /* close */
         udpv6_connect,                  /* connect */
         NULL,                           /* accept */
@@ -908,9 +979,7 @@ struct proto udpv6_prot = {
         udpv6_queue_rcv_skb,            /* backlog_rcv */
         udp_v6_hash,                    /* hash */
         udp_v6_unhash,                  /* unhash */
-       udp_v6_rehash,                  /* rehash */
-       udp_good_socknum,               /* good_socknum */
-       udp_v6_verify_bind,             /* verify_bind */
+       udp_v6_get_port,                /* get_port */
         128,                            /* max_header */
         0,                              /* retransmits */
         "UDP",                          /* name */
diff --git a/net/netsyms.c b/net/netsyms.c

index ff92b23f11dc441dea4c5b41fddd878c10f6ccbe..b67fe251d03bf000703af11b0a1a5dca525f3091 100644 (file)
--- a/net/netsyms.c
+++ b/net/netsyms.c
@@ -60,6 +60,9 @@ extern __u32 sysctl_rmem_max;
  #include <net/transp_v6.h>
  
  extern int tcp_tw_death_row_slot;
+extern int sysctl_local_port_range[2];
+extern int tcp_port_rover;
+extern int udp_port_rover;
  #endif
  
  #endif
@@ -281,13 +284,11 @@ EXPORT_SYMBOL(inet_sendmsg);
  EXPORT_SYMBOL(inet_recvmsg);
  
  /* Socket demultiplexing. */
-EXPORT_SYMBOL(tcp_good_socknum);
  EXPORT_SYMBOL(tcp_ehash);
  EXPORT_SYMBOL(tcp_ehash_size);
  EXPORT_SYMBOL(tcp_listening_hash);
  EXPORT_SYMBOL(tcp_bhash);
  EXPORT_SYMBOL(tcp_bhash_size);
-EXPORT_SYMBOL(udp_good_socknum);
  EXPORT_SYMBOL(udp_hash);
  
  EXPORT_SYMBOL(destroy_sock);
@@ -328,7 +329,9 @@ EXPORT_SYMBOL(tcp_v4_send_check);
  EXPORT_SYMBOL(tcp_v4_conn_request);
  EXPORT_SYMBOL(tcp_create_openreq_child);
  EXPORT_SYMBOL(tcp_bucket_create);
-EXPORT_SYMBOL(tcp_bucket_unlock);
+EXPORT_SYMBOL(__tcp_put_port);
+EXPORT_SYMBOL(tcp_put_port);
+EXPORT_SYMBOL(tcp_inherit_port);
  EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
  EXPORT_SYMBOL(tcp_v4_do_rcv);
  EXPORT_SYMBOL(tcp_v4_connect);
@@ -344,6 +347,9 @@ EXPORT_SYMBOL(tcp_transmit_skb);
  EXPORT_SYMBOL(tcp_connect);
  EXPORT_SYMBOL(tcp_make_synack);
  EXPORT_SYMBOL(tcp_tw_death_row_slot);
+EXPORT_SYMBOL(sysctl_local_port_range);
+EXPORT_SYMBOL(tcp_port_rover);
+EXPORT_SYMBOL(udp_port_rover);
  EXPORT_SYMBOL(tcp_sync_mss);
  EXPORT_SYMBOL(net_statistics); 
  
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c

index d0de24eff8ad54dca3cda8aeeb2d3e7aef7af89b..87c0aedc1d01eca5a5e4deb9bcf36814b1b2068d 100644 (file)
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -42,7 +42,6 @@
  #define __KERNEL_SYSCALLS__
  
  #include <linux/version.h>
-#include <linux/config.h>
  #include <linux/types.h>
  #include <linux/malloc.h>
  #include <linux/sched.h>
author	Linus Torvalds <torvalds@linuxfoundation.org>
	Fri, 23 Nov 2007 20:25:59 +0000 (15:25 -0500)
committer	Linus Torvalds <torvalds@linuxfoundation.org>
	Fri, 23 Nov 2007 20:25:59 +0000 (15:25 -0500)
arch/i386/kernel/ptrace.c		patch \| blob \| history
arch/i386/mm/fault.c		patch \| blob \| history
drivers/block/hpt34x.c		patch \| blob \| history
drivers/block/loop.c		patch \| blob \| history
drivers/block/piix.c		patch \| blob \| history
drivers/char/sysrq.c		patch \| blob \| history
drivers/char/tty_io.c		patch \| blob \| history
drivers/misc/parport_daisy.c		patch \| blob \| history
drivers/scsi/st.c		patch \| blob \| history
drivers/sgi/char/usema.c		patch \| blob \| history
fs/buffer.c		patch \| blob \| history
fs/dquot.c		patch \| blob \| history
fs/exec.c		patch \| blob \| history
fs/file_table.c		patch \| blob \| history
fs/inode.c		patch \| blob \| history
fs/nfs/write.c		patch \| blob \| history
fs/open.c		patch \| blob \| history
fs/proc/array.c		patch \| blob \| history
fs/proc/inode.c		patch \| blob \| history
fs/proc/mem.c		patch \| blob \| history
fs/proc/root.c		patch \| blob \| history
fs/super.c		patch \| blob \| history
include/linux/file.h		patch \| blob \| history
include/linux/fs.h		patch \| blob \| history
include/linux/mm.h		patch \| blob \| history
include/linux/proc_fs.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
include/linux/tty.h		patch \| blob \| history
include/net/sock.h		patch \| blob \| history
include/net/tcp.h		patch \| blob \| history
include/net/udp.h		patch \| blob \| history
ipc/shm.c		patch \| blob \| history
kernel/acct.c		patch \| blob \| history
kernel/exit.c		patch \| blob \| history
kernel/fork.c		patch \| blob \| history
kernel/ksyms.c		patch \| blob \| history
kernel/sys.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/mmap.c		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history
net/core/scm.c		patch \| blob \| history
net/econet/econet.c		patch \| blob \| history
net/ipv4/af_inet.c		patch \| blob \| history
net/ipv4/proc.c		patch \| blob \| history
net/ipv4/raw.c		patch \| blob \| history
net/ipv4/tcp_input.c		patch \| blob \| history
net/ipv4/tcp_ipv4.c		patch \| blob \| history
net/ipv4/tcp_timer.c		patch \| blob \| history
net/ipv4/udp.c		patch \| blob \| history
net/ipv6/af_inet6.c		patch \| blob \| history
net/ipv6/proc.c		patch \| blob \| history
net/ipv6/raw.c		patch \| blob \| history
net/ipv6/tcp_ipv6.c		patch \| blob \| history
net/ipv6/udp.c		patch \| blob \| history
net/netsyms.c		patch \| blob \| history
net/sunrpc/xprt.c		patch \| blob \| history