]> git.neil.brown.name Git - history.git/commitdiff
Import 2.3.10pre2 2.3.10pre2
authorLinus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:25:59 +0000 (15:25 -0500)
committerLinus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:25:59 +0000 (15:25 -0500)
56 files changed:
arch/i386/kernel/ptrace.c
arch/i386/mm/fault.c
drivers/block/hpt34x.c
drivers/block/loop.c
drivers/block/piix.c
drivers/char/sysrq.c
drivers/char/tty_io.c
drivers/misc/parport_daisy.c
drivers/scsi/st.c
drivers/sgi/char/usema.c
fs/buffer.c
fs/dquot.c
fs/exec.c
fs/file_table.c
fs/inode.c
fs/nfs/write.c
fs/open.c
fs/proc/array.c
fs/proc/inode.c
fs/proc/mem.c
fs/proc/root.c
fs/super.c
include/linux/file.h
include/linux/fs.h
include/linux/mm.h
include/linux/proc_fs.h
include/linux/sched.h
include/linux/tty.h
include/net/sock.h
include/net/tcp.h
include/net/udp.h
ipc/shm.c
kernel/acct.c
kernel/exit.c
kernel/fork.c
kernel/ksyms.c
kernel/sys.c
mm/memory.c
mm/mmap.c
mm/vmscan.c
net/core/scm.c
net/econet/econet.c
net/ipv4/af_inet.c
net/ipv4/proc.c
net/ipv4/raw.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_timer.c
net/ipv4/udp.c
net/ipv6/af_inet6.c
net/ipv6/proc.c
net/ipv6/raw.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c
net/netsyms.c
net/sunrpc/xprt.c

index 28aea16a8bc26d9a2e80b0f98c81140de9c756cf..09664ba815c087c323b9a6aa5f749cd43ff298da 100644 (file)
@@ -73,7 +73,8 @@ static inline int put_stack_long(struct task_struct *task, int offset,
  * and that it is in the task area before calling this: this routine does
  * no checking.
  */
-static unsigned long get_long(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr)
+static unsigned long get_long(struct task_struct * tsk, 
+       struct vm_area_struct * vma, unsigned long addr)
 {
        pgd_t * pgdir;
        pmd_t * pgmiddle;
@@ -83,7 +84,7 @@ static unsigned long get_long(struct mm_struct * mm, struct vm_area_struct * vma
 repeat:
        pgdir = pgd_offset(vma->vm_mm, addr);
        if (pgd_none(*pgdir)) {
-               handle_mm_fault(mm, vma, addr, 0);
+               handle_mm_fault(tsk, vma, addr, 0);
                goto repeat;
        }
        if (pgd_bad(*pgdir)) {
@@ -93,7 +94,7 @@ repeat:
        }
        pgmiddle = pmd_offset(pgdir, addr);
        if (pmd_none(*pgmiddle)) {
-               handle_mm_fault(mm, vma, addr, 0);
+               handle_mm_fault(tsk, vma, addr, 0);
                goto repeat;
        }
        if (pmd_bad(*pgmiddle)) {
@@ -103,7 +104,7 @@ repeat:
        }
        pgtable = pte_offset(pgmiddle, addr);
        if (!pte_present(*pgtable)) {
-               handle_mm_fault(mm, vma, addr, 0);
+               handle_mm_fault(tsk, vma, addr, 0);
                goto repeat;
        }
        page = pte_page(*pgtable);
@@ -123,7 +124,7 @@ repeat:
  * Now keeps R/W state of page so that a text page stays readonly
  * even if a debugger scribbles breakpoints into it.  -M.U-
  */
-static void put_long(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr,
+static void put_long(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long addr,
        unsigned long data)
 {
        pgd_t *pgdir;
@@ -134,7 +135,7 @@ static void put_long(struct mm_struct * mm, struct vm_area_struct * vma, unsigne
 repeat:
        pgdir = pgd_offset(vma->vm_mm, addr);
        if (!pgd_present(*pgdir)) {
-               handle_mm_fault(mm, vma, addr, 1);
+               handle_mm_fault(tsk, vma, addr, 1);
                goto repeat;
        }
        if (pgd_bad(*pgdir)) {
@@ -144,7 +145,7 @@ repeat:
        }
        pgmiddle = pmd_offset(pgdir, addr);
        if (pmd_none(*pgmiddle)) {
-               handle_mm_fault(mm, vma, addr, 1);
+               handle_mm_fault(tsk, vma, addr, 1);
                goto repeat;
        }
        if (pmd_bad(*pgmiddle)) {
@@ -154,12 +155,12 @@ repeat:
        }
        pgtable = pte_offset(pgmiddle, addr);
        if (!pte_present(*pgtable)) {
-               handle_mm_fault(mm, vma, addr, 1);
+               handle_mm_fault(tsk, vma, addr, 1);
                goto repeat;
        }
        page = pte_page(*pgtable);
        if (!pte_write(*pgtable)) {
-               handle_mm_fault(mm, vma, addr, 1);
+               handle_mm_fault(tsk, vma, addr, 1);
                goto repeat;
        }
 /* this is a hack for non-kernel-mapped video buffers and similar */
@@ -175,10 +176,10 @@ repeat:
  * This routine checks the page boundaries, and that the offset is
  * within the task area. It then calls get_long() to read a long.
  */
-static int read_long(struct mm_struct * mm, unsigned long addr,
+static int read_long(struct task_struct * tsk, unsigned long addr,
        unsigned long * result)
 {
-       struct vm_area_struct * vma = find_extend_vma(mm, addr);
+       struct vm_area_struct * vma = find_extend_vma(tsk, addr);
 
        if (!vma)
                return -EIO;
@@ -191,8 +192,8 @@ static int read_long(struct mm_struct * mm, unsigned long addr,
                        if (!vma_high || vma_high->vm_start != vma->vm_end)
                                return -EIO;
                }
-               low = get_long(mm, vma, addr & ~(sizeof(long)-1));
-               high = get_long(mm, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
+               low = get_long(tsk, vma, addr & ~(sizeof(long)-1));
+               high = get_long(tsk, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
                switch (addr & (sizeof(long)-1)) {
                        case 1:
                                low >>= 8;
@@ -209,7 +210,7 @@ static int read_long(struct mm_struct * mm, unsigned long addr,
                }
                *result = low;
        } else
-               *result = get_long(mm, vma, addr);
+               *result = get_long(tsk, vma, addr);
        return 0;
 }
 
@@ -217,10 +218,10 @@ static int read_long(struct mm_struct * mm, unsigned long addr,
  * This routine checks the page boundaries, and that the offset is
  * within the task area. It then calls put_long() to write a long.
  */
-static int write_long(struct mm_struct * mm, unsigned long addr,
+static int write_long(struct task_struct * tsk, unsigned long addr,
        unsigned long data)
 {
-       struct vm_area_struct * vma = find_extend_vma(mm, addr);
+       struct vm_area_struct * vma = find_extend_vma(tsk, addr);
 
        if (!vma)
                return -EIO;
@@ -233,8 +234,8 @@ static int write_long(struct mm_struct * mm, unsigned long addr,
                        if (!vma_high || vma_high->vm_start != vma->vm_end)
                                return -EIO;
                }
-               low = get_long(mm, vma, addr & ~(sizeof(long)-1));
-               high = get_long(mm, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
+               low = get_long(tsk, vma, addr & ~(sizeof(long)-1));
+               high = get_long(tsk, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
                switch (addr & (sizeof(long)-1)) {
                        case 0: /* shouldn't happen, but safety first */
                                low = data;
@@ -258,10 +259,10 @@ static int write_long(struct mm_struct * mm, unsigned long addr,
                                high |= data >> 8;
                                break;
                }
-               put_long(mm, vma, addr & ~(sizeof(long)-1),low);
-               put_long(mm, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1),high);
+               put_long(tsk, vma, addr & ~(sizeof(long)-1),low);
+               put_long(tsk, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1),high);
        } else
-               put_long(mm, vma, addr, data);
+               put_long(tsk, vma, addr, data);
        return 0;
 }
 
@@ -403,7 +404,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
                        unsigned long tmp;
 
                        down(&child->mm->mmap_sem);
-                       ret = read_long(child->mm, addr, &tmp);
+                       ret = read_long(child, addr, &tmp);
                        up(&child->mm->mmap_sem);
                        if (ret >= 0)
                                ret = put_user(tmp,(unsigned long *) data);
@@ -436,7 +437,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
                case PTRACE_POKETEXT: /* write the word at location addr. */
                case PTRACE_POKEDATA:
                        down(&child->mm->mmap_sem);
-                       ret = write_long(child->mm,addr,data);
+                       ret = write_long(child,addr,data);
                        up(&child->mm->mmap_sem);
                        goto out;
 
index bb808c300ebb6e850d9190a89fb7f30c85a6b9b0..c3e423b216ef48caf4eb1b9d43afa460be108fe5 100644 (file)
@@ -50,7 +50,8 @@ good_area:
        start &= PAGE_MASK;
 
        for (;;) {
-               handle_mm_fault(current->mm, vma, start, 1);
+               if (handle_mm_fault(current, vma, start, 1) <= 0)
+                       goto bad_area;
                if (!size)
                        break;
                size--;
@@ -162,8 +163,13 @@ good_area:
         * make sure we exit gracefully rather than endlessly redo
         * the fault.
         */
-       if (!handle_mm_fault(mm, vma, address, write))
-               goto do_sigbus;
+       {
+               int fault = handle_mm_fault(tsk, vma, address, write);
+               if (fault < 0)
+                       goto out_of_memory;
+               if (!fault)
+                       goto do_sigbus;
+       }
 
        /*
         * Did it hit the DOS screen memory VA from vm86 mode?
@@ -255,6 +261,13 @@ no_context:
  * We ran out of memory, or some other thing happened to us that made
  * us unable to handle the page fault gracefully.
  */
+out_of_memory:
+       up(&mm->mmap_sem);
+       printk("VM: killing process %s\n", tsk->comm);
+       if (error_code & 4)
+               do_exit(SIGKILL);
+       goto no_context;
+
 do_sigbus:
        up(&mm->mmap_sem);
 
index e853ee43893505eef0699a405a69034bc23cfd0c..01494ca1e019ad7869ffdc2a58a2195f9d72a59c 100644 (file)
@@ -20,6 +20,7 @@
  *     = ((hwif->channel ? 2 : 0) + (drive->select.b.unit & 0x01));
  */
 
+#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
index 26ffb26cac3da1a72a13d98d4b8e704d645de509..29d78129f32ebb939dd2c193bf64c36dad109d8a 100644 (file)
@@ -391,6 +391,7 @@ static int loop_set_fd(struct loop_device *lo, kdev_t dev, unsigned int arg)
                        lo->lo_backing_file->f_dentry = file->f_dentry;
                        lo->lo_backing_file->f_op = file->f_op;
                        lo->lo_backing_file->private_data = file->private_data;
+                       file_moveto(lo->lo_backing_file, file);
 
                        error = get_write_access(inode);
                        if (error) {
index 79a0e873f6daef27a2bd6b31b6e228c06f2c6b42..384712603e359eea75b3b72e4554d95b1c5b02fa 100644 (file)
@@ -52,6 +52,7 @@
  * #endif
  */
 
+#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/ioport.h>
index 574f1b1e9ef4fe836bbaadb3db292de694e237cf..e6f50ebf1050082d803688533bcdd6ea1a394544 100644 (file)
@@ -150,15 +150,6 @@ void handle_sysrq(int key, struct pt_regs *pt_regs,
 
 /* Aux routines for the syncer */
 
-static void all_files_read_only(void)      /* Kill write permissions of all files */
-{
-       struct file *file;
-
-       for (file = inuse_filps; file; file = file->f_next)
-               if (file->f_dentry && atomic_read(&file->f_count) && S_ISREG(file->f_dentry->d_inode->i_mode))
-                       file->f_mode &= ~2;
-}
-
 static int is_local_disk(kdev_t dev)       /* Guess if the device is a local hard drive */
 {
        unsigned int major = MAJOR(dev);
@@ -192,6 +183,7 @@ static void go_sync(kdev_t dev, int remount_flag)
                struct super_block *sb = get_super(dev);
                struct vfsmount *vfsmnt;
                int ret, flags;
+               struct list_head *p;
 
                if (!sb) {
                        printk("Superblock not found\n");
@@ -201,6 +193,15 @@ static void go_sync(kdev_t dev, int remount_flag)
                        printk("R/O\n");
                        return;
                }
+
+               file_list_lock();
+               for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
+                       struct file *file = list_entry(p, struct file, f_list);
+                       if (file->f_dentry && file_count(file)
+                               && S_ISREG(file->f_dentry->d_inode->i_mode))
+                               file->f_mode &= ~2;
+               }
+               file_list_unlock();
                DQUOT_OFF(dev);
                fsync_dev(dev);
                flags = MS_RDONLY;
@@ -240,9 +241,6 @@ void do_emergency_sync(void)
        remount_flag = (emergency_sync_scheduled == EMERG_REMOUNT);
        emergency_sync_scheduled = 0;
 
-       if (remount_flag)
-               all_files_read_only();
-
        for (mnt = vfsmntlist; mnt; mnt = mnt->mnt_next)
                if (is_local_disk(mnt->mnt_dev))
                        go_sync(mnt->mnt_dev, remount_flag);
index 3951b542274eec8c0821f61bb4323c7b4c57d5fd..d0ceb78fe15b4f203d73b3b43eaf3e8911e740cf 100644 (file)
@@ -173,13 +173,15 @@ inline int tty_paranoia_check(struct tty_struct *tty, kdev_t device,
 static int check_tty_count(struct tty_struct *tty, const char *routine)
 {
 #ifdef CHECK_TTY_COUNT
-       struct file *f;
+       struct list_head *p;
        int count = 0;
        
-       for(f = inuse_filps; f; f = f->f_next) {
-               if(f->private_data == tty)
+       file_list_lock();
+       for(p = tty->tty_files.next; p != &tty->tty_files; p = p->next) {
+               if(list_entry(p, struct file, f_list)->private_data == tty)
                        count++;
        }
+       file_list_unlock();
        if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
            tty->driver.subtype == PTY_TYPE_SLAVE &&
            tty->link && tty->link->count)
@@ -383,9 +385,9 @@ static struct file_operations hung_up_tty_fops = {
 void do_tty_hangup(void *data)
 {
        struct tty_struct *tty = (struct tty_struct *) data;
-       struct file * filp;
        struct file * cons_filp = NULL;
        struct task_struct *p;
+       struct list_head *l;
        int    closecount = 0, n;
 
        if (!tty)
@@ -395,13 +397,11 @@ void do_tty_hangup(void *data)
        lock_kernel();
        
        check_tty_count(tty, "do_tty_hangup");
-       for (filp = inuse_filps; filp; filp = filp->f_next) {
-               if (filp->private_data != tty)
-                       continue;
+       file_list_lock();
+       for (l = tty->tty_files.next; l != &tty->tty_files; l = l->next) {
+               struct file * filp = list_entry(l, struct file, f_list);
                if (!filp->f_dentry)
                        continue;
-               if (!filp->f_dentry->d_inode)
-                       continue;
                if (filp->f_dentry->d_inode->i_rdev == CONSOLE_DEV ||
                    filp->f_dentry->d_inode->i_rdev == SYSCONS_DEV) {
                        cons_filp = filp;
@@ -410,9 +410,10 @@ void do_tty_hangup(void *data)
                if (filp->f_op != &tty_fops)
                        continue;
                closecount++;
-               tty_fasync(-1, filp, 0);
+               tty_fasync(-1, filp, 0);        /* can't block */
                filp->f_op = &hung_up_tty_fops;
        }
+       file_list_unlock();
        
        /* FIXME! What are the locking issues here? This may me overdoing things.. */
        {
@@ -1307,6 +1308,7 @@ retry_open:
 init_dev_done:
 #endif
        filp->private_data = tty;
+       file_move(filp, &tty->tty_files);
        check_tty_count(tty, "tty_open");
        if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
            tty->driver.subtype == PTY_TYPE_MASTER)
@@ -1937,6 +1939,7 @@ static void initialize_tty_struct(struct tty_struct *tty)
        tty->tq_hangup.routine = do_tty_hangup;
        tty->tq_hangup.data = tty;
        sema_init(&tty->atomic_read, 1);
+       INIT_LIST_HEAD(&tty->tty_files);
 }
 
 /*
index 78b63cea26cbcb31442fd35ca0bda3f992a387bd..830d9d400d14454c0ffa29f95e1390df40603af2 100644 (file)
@@ -15,7 +15,6 @@
  *
  */
 
-#include <linux/config.h>
 #include <linux/parport.h>
 #include <linux/delay.h>
 #include <asm/uaccess.h>
index dc702848d902a9009a89a415584fd5f3e20fc8bf..d43121e680915dd83fed5f1c5aa1028802d16117 100644 (file)
@@ -890,7 +890,7 @@ scsi_tape_flush(struct file * filp)
     kdev_t devt = inode->i_rdev;
     int dev;
 
-    if (atomic_read(&filp->f_count) > 1)
+    if (file_count(filp) > 1)
        return 0;
 
     dev = TAPE_NR(devt);
index e91a944568efff7d5e89445fba5d99355d5c5bca..40db75465589cf610650b823cd49f4a80a0fbc5d 100644 (file)
@@ -53,8 +53,8 @@ sgi_usema_attach (usattach_t * attach, struct irix_usema *usema)
        if (newfd < 0)
                return newfd;
        
-       current->files->fd [newfd] = usema->filp;
-       atomic_inc(&usema->filp->f_count);
+       get_file(usema);
+       fd_install(newfd, usema->filp);
        /* Is that it? */
        printk("UIOCATTACHSEMA: new usema fd is %d", newfd);
        return newfd;
index 0ddd121ab7463bda70d2f3c2f199fd96205234b7..108b385eaa4e4409badefe4a5ba9e769839bd719 100644 (file)
@@ -1597,7 +1597,7 @@ int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size, int bmap)
                bh = bh->b_this_page;
        } while (bh != head);
        if (rw == READ)
-               ++current->mm->maj_flt;
+               ++current->maj_flt;
        if ((rw == READ) && nr) {
                if (Page_Uptodate(page))
                        BUG();
@@ -1663,7 +1663,7 @@ int block_read_full_page(struct file * file, struct page * page)
                nr++;
        } while (iblock++, (bh = bh->b_this_page) != head);
 
-       ++current->mm->maj_flt;
+       ++current->maj_flt;
        if (nr) {
                if (Page_Uptodate(page))
                        BUG();
index dfef0a63ab415375c1bf371bb9042336c481d98c..9dfbac082a679409ed49d833afc2a09215336cd5 100644 (file)
@@ -577,32 +577,35 @@ we_slept:
 static void add_dquot_ref(kdev_t dev, short type)
 {
        struct super_block *sb = get_super(dev);
-       struct file *filp;
+       struct list_head *p;
        struct inode *inode;
 
        if (!sb || !sb->dq_op)
                return; /* nothing to do */
 
-       for (filp = inuse_filps; filp; filp = filp->f_next) {
+       file_list_lock();
+       for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
+               struct file *filp = list_entry(p, struct file, f_list);
                if (!filp->f_dentry)
                        continue;
-               if (filp->f_dentry->d_sb != sb)
-                       continue;
                inode = filp->f_dentry->d_inode;
                if (!inode)
                        continue;
                /* N.B. race problem -- filp could become unused */
                if (filp->f_mode & FMODE_WRITE) {
+                       file_list_unlock();
                        sb->dq_op->initialize(inode, type);
                        inode->i_flags |= S_QUOTA;
+                       file_list_lock();
                }
        }
+       file_list_unlock();
 }
 
 static void reset_dquot_ptrs(kdev_t dev, short type)
 {
        struct super_block *sb = get_super(dev);
-       struct file *filp;
+       struct list_head *p;
        struct inode *inode;
        struct dquot *dquot;
        int cnt;
@@ -614,11 +617,11 @@ restart:
        /* free any quota for unused dentries */
        shrink_dcache_sb(sb);
 
-       for (filp = inuse_filps; filp; filp = filp->f_next) {
+       file_list_lock();
+       for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
+               struct file *filp = list_entry(p, struct file, f_list);
                if (!filp->f_dentry)
                        continue;
-               if (filp->f_dentry->d_sb != sb)
-                       continue;
                inode = filp->f_dentry->d_inode;
                if (!inode)
                        continue;
@@ -637,12 +640,14 @@ restart:
                        inode->i_flags &= ~S_QUOTA;
                put_it:
                        if (dquot != NODQUOT) {
+                               file_list_unlock();
                                dqput(dquot);
                                /* we may have blocked ... */
                                goto restart;
                        }
                }
        }
+       file_list_unlock();
 }
 
 static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number)
index 83b1834de5db67523ee726bd6a581a37030d6cb8..7215f69f15ff3aef265feb716f9c0c7ae67199e0 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -119,8 +119,12 @@ int open_dentry(struct dentry * dentry, int mode)
 {
        struct inode * inode = dentry->d_inode;
        struct file * f;
+       struct list_head * l = NULL;
        int fd, error;
 
+       if (inode->i_sb)
+               l = &inode->i_sb->s_files;
+
        error = -EINVAL;
        if (!inode->i_op || !inode->i_op->default_file_ops)
                goto out;
@@ -141,6 +145,7 @@ int open_dentry(struct dentry * dentry, int mode)
                        if (error)
                                goto out_filp;
                }
+               file_move(f, l);
                fd_install(fd, f);
                dget(dentry);
        }
index 80c3a08bab79891c9bafab74ef332df16e4fc404..cb9ef16e95a81970ce729448a4f6a899dfda1d72 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/init.h>
+#include <linux/smp_lock.h>
 
 /* SLAB cache for filp's. */
 static kmem_cache_t *filp_cache;
@@ -18,37 +19,12 @@ int nr_files = 0;   /* read only */
 int nr_free_files = 0; /* read only */
 int max_files = NR_FILE;/* tunable */
 
-/* Free list management, if you are here you must have f_count == 0 */
-static struct file * free_filps = NULL;
-
-static void insert_file_free(struct file *file)
-{
-       if((file->f_next = free_filps) != NULL)
-               free_filps->f_pprev = &file->f_next;
-       free_filps = file;
-       file->f_pprev = &free_filps;
-       nr_free_files++;
-}
-
-/* The list of in-use filp's must be exported (ugh...) */
-struct file *inuse_filps = NULL;
-
-static inline void put_inuse(struct file *file)
-{
-       if((file->f_next = inuse_filps) != NULL)
-               inuse_filps->f_pprev = &file->f_next;
-       inuse_filps = file;
-       file->f_pprev = &inuse_filps;
-}
-
-/* It does not matter which list it is on. */
-static inline void remove_filp(struct file *file)
-{
-       if(file->f_next)
-               file->f_next->f_pprev = file->f_pprev;
-       *file->f_pprev = file->f_next;
-}
-
+/* Here the new files go */
+static LIST_HEAD(anon_list);
+/* And here the free ones sit */
+static LIST_HEAD(free_list);
+/* public *and* exported. Not pretty! */
+spinlock_t files_lock = SPIN_LOCK_UNLOCKED;
 
 void __init file_table_init(void)
 {
@@ -67,24 +43,30 @@ void __init file_table_init(void)
 /* Find an unused file structure and return a pointer to it.
  * Returns NULL, if there are no more free file structures or
  * we run out of memory.
+ *
+ * SMP-safe.
  */
 struct file * get_empty_filp(void)
 {
        static int old_max = 0;
        struct file * f;
 
+       file_list_lock();
        if (nr_free_files > NR_RESERVED_FILES) {
        used_one:
-               f = free_filps;
-               remove_filp(f);
+               f = list_entry(free_list.next, struct file, f_list);
+               list_del(&f->f_list);
                nr_free_files--;
        new_one:
+               file_list_unlock();
                memset(f, 0, sizeof(*f));
-               atomic_set(&f->f_count, 1);
+               atomic_set(&f->f_count,1);
                f->f_version = ++event;
                f->f_uid = current->fsuid;
                f->f_gid = current->fsgid;
-               put_inuse(f);
+               file_list_lock();
+               list_add(&f->f_list, &anon_list);
+               file_list_unlock();
                return f;
        }
        /*
@@ -96,7 +78,9 @@ struct file * get_empty_filp(void)
         * Allocate a new one if we're below the limit.
         */
        if (nr_files < max_files) {
+               file_list_unlock();
                f = kmem_cache_alloc(filp_cache, SLAB_KERNEL);
+               file_list_lock();
                if (f) {
                        nr_files++;
                        goto new_one;
@@ -108,6 +92,7 @@ struct file * get_empty_filp(void)
                printk("VFS: file-max limit %d reached\n", max_files);
                old_max = max_files;
        }
+       file_list_unlock();
        return NULL;
 }
 
@@ -131,20 +116,77 @@ int init_private_file(struct file *filp, struct dentry *dentry, int mode)
                return 0;
 }
 
-void fput(struct file *file)
+void _fput(struct file *file)
 {
-       if (atomic_dec_and_test(&file->f_count)) {
-               locks_remove_flock(file);
-               __fput(file);
-               remove_filp(file);
-               insert_file_free(file);
-       }
+       atomic_inc(&file->f_count);
+
+       lock_kernel();
+       locks_remove_flock(file);       /* Still need the */
+       __fput(file);                   /* big lock here. */
+       unlock_kernel();
+
+       atomic_set(&file->f_count, 0);
+       file_list_lock();
+       list_del(&file->f_list);
+       list_add(&file->f_list, &free_list);
+       nr_free_files++;
+       file_list_unlock();
 }
 
+/* Here. put_filp() is SMP-safe now. */
+
 void put_filp(struct file *file)
 {
-       if (atomic_dec_and_test(&file->f_count)) {
-               remove_filp(file);
-               insert_file_free(file);
+       if(atomic_dec_and_test(&file->f_count)) {
+               file_list_lock();
+               list_del(&file->f_list);
+               list_add(&file->f_list, &free_list);
+               nr_free_files++;
+               file_list_unlock();
+       }
+}
+
+void file_move(struct file *file, struct list_head *list)
+{
+       if (!list)
+               return;
+       file_list_lock();
+       list_del(&file->f_list);
+       list_add(&file->f_list, list);
+       file_list_unlock();
+}
+
+void file_moveto(struct file *new, struct file *old)
+{
+       file_list_lock();
+       list_del(&new->f_list);
+       list_add(&new->f_list, &old->f_list);
+       file_list_unlock();
+}
+
+int fs_may_remount_ro(struct super_block *sb)
+{
+       struct list_head *p;
+
+       /* Check that no files are currently opened for writing. */
+       file_list_lock();
+       for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
+               struct file *file = list_entry(p, struct file, f_list);
+               struct inode *inode = file->f_dentry->d_inode;
+               if (!inode)
+                       continue;
+
+               /* File with pending delete? */
+               if (inode->i_nlink == 0)
+                       goto too_bad;
+
+               /* Writable file? */
+               if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
+                       return 0;
        }
+       file_list_unlock();
+       return 1; /* Tis' cool bro. */
+too_bad:
+       file_list_unlock();
+       return 0;
 }
index 01fc64d235ee8dc9581644a07eba8510a5e386cd..bcf36de99805b84c3e5d5b32e424deb832ad4b47 100644 (file)
@@ -818,31 +818,6 @@ void __init inode_init(void)
        max_inodes = max;
 }
 
-/* This belongs in file_table.c, not here... */
-int fs_may_remount_ro(struct super_block *sb)
-{
-       struct file *file;
-
-       /* Check that no files are currently opened for writing. */
-       for (file = inuse_filps; file; file = file->f_next) {
-               struct inode *inode;
-               if (!file->f_dentry)
-                       continue;
-               inode = file->f_dentry->d_inode;
-               if (!inode || inode->i_sb != sb)
-                       continue;
-
-               /* File with pending delete? */
-               if (inode->i_nlink == 0)
-                       return 0;
-
-               /* Writable file? */
-               if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
-                       return 0;
-       }
-       return 1; /* Tis' cool bro. */
-}
-
 void update_atime (struct inode *inode)
 {
     if ( IS_NOATIME (inode) ) return;
index 8b63cbf662c3f20084f2da62281be244c8dd1e97..77c1db0915b6997dcb05450e7cd81946030074fd 100644 (file)
@@ -305,6 +305,7 @@ create_write_request(struct file * file, struct page *page, unsigned int offset,
                goto out_req;
 
        /* Put the task on inode's writeback request list. */
+       get_file(file);
        wreq->wb_file = file;
        wreq->wb_pid    = current->pid;
        wreq->wb_page   = page;
@@ -467,7 +468,6 @@ nfs_updatepage(struct file *file, struct page *page, unsigned long offset, unsig
         * The IO completion will then free the page and the dentry.
         */
        get_page(page);
-       atomic_inc(&file->f_count);
 
        /* Schedule request */
        synchronous = schedule_write_request(req, synchronous);
index deb898f768dd07c803cbcf77b75dc8a6537729d8..06ac9610cd04205b6aea5521644549b425f32a4f 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -663,6 +663,8 @@ struct file *filp_open(const char * filename, int flags, int mode)
        f->f_op = NULL;
        if (inode->i_op)
                f->f_op = inode->i_op->default_file_ops;
+       if (inode->i_sb)
+               file_move(f, &inode->i_sb->s_files);
        if (f->f_op && f->f_op->open) {
                error = f->f_op->open(inode,f);
                if (error)
index 49d0d005877609a8635284d312485534ceb617dc..66108f9a79f015473c47698112aeacf6222a1c1b 100644 (file)
@@ -906,10 +906,10 @@ static int get_stat(int pid, char * buffer)
                tsk->tty ? kdev_t_to_nr(tsk->tty->device) : 0,
                tty_pgrp,
                tsk->flags,
-               tsk->mm ? tsk->mm->min_flt : 0,
-               tsk->mm ? tsk->mm->cmin_flt : 0,
-               tsk->mm ? tsk->mm->maj_flt : 0,
-               tsk->mm ? tsk->mm->cmaj_flt : 0,
+               tsk->min_flt,
+               tsk->cmin_flt,
+               tsk->maj_flt,
+               tsk->cmaj_flt,
                tsk->times.tms_utime,
                tsk->times.tms_stime,
                tsk->times.tms_cutime,
@@ -936,8 +936,8 @@ static int get_stat(int pid, char * buffer)
                sigign      .sig[0] & 0x7fffffffUL,
                sigcatch    .sig[0] & 0x7fffffffUL,
                wchan,
-               tsk->mm ? tsk->mm->nswap : 0,
-               tsk->mm ? tsk->mm->cnswap : 0,
+               tsk->nswap,
+               tsk->cnswap,
                tsk->exit_signal,
                tsk->processor);
 }
index 970e63a96b3a8672b2ed7e7cf9f3ace5ccca613e..4d329d090bd9bc77ab233bca1e461159188072f8 100644 (file)
@@ -87,13 +87,26 @@ static void proc_delete_inode(struct inode *inode)
        }
 }
 
+struct super_block *proc_super_blocks = NULL;
+
+static void proc_put_super(struct super_block *sb)
+{
+       struct super_block **p = &proc_super_blocks;
+       while (*p != sb) {
+               if (!*p)        /* should never happen */
+                       return;
+               p = (struct super_block **)&(*p)->u.generic_sbp;
+       }
+       *p = (struct super_block *)(*p)->u.generic_sbp;
+}
+
 static struct super_operations proc_sops = { 
        proc_read_inode,
        proc_write_inode,
        proc_put_inode,
        proc_delete_inode,      /* delete_inode(struct inode *) */
        NULL,
-       NULL,
+       proc_put_super,
        NULL,
        proc_statfs,
        NULL
@@ -323,6 +336,8 @@ struct super_block *proc_read_super(struct super_block *s,void *data,
        if (!s->s_root)
                goto out_no_root;
        parse_options(data, &root_inode->i_uid, &root_inode->i_gid);
+       s->u.generic_sbp = (void*) proc_super_blocks;
+       proc_super_blocks = s;
        unlock_super(s);
        return s;
 
index b095df35361937de43278f0d5aa4ac1141f7fafb..4d599c77b50d4448461c89154d84bb115f6d8636 100644 (file)
@@ -289,10 +289,10 @@ int mem_mmap(struct file * file, struct vm_area_struct * vma)
                        return -ENOMEM;
 
                if (!pte_present(*src_table))
-                       handle_mm_fault(tsk->mm, src_vma, stmp, 1);
+                       handle_mm_fault(tsk, src_vma, stmp, 1);
 
                if ((vma->vm_flags & VM_WRITE) && !pte_write(*src_table))
-                       handle_mm_fault(tsk->mm, src_vma, stmp, 1);
+                       handle_mm_fault(tsk, src_vma, stmp, 1);
 
                set_pte(src_table, pte_mkdirty(*src_table));
                set_pte(dest_table, *src_table);
index 31b89ca824ba01e51c848e510dc90281bfd6924b..a6c05e91e95506e49a8733c8670c9054da18b537 100644 (file)
@@ -365,24 +365,36 @@ int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp)
  */
 static void proc_kill_inodes(int ino)
 {
-       struct file *filp;
-
-       /* inuse_filps is protected by the single kernel lock */
-       for (filp = inuse_filps; filp; filp = filp->f_next) {
-               struct dentry * dentry;
-               struct inode * inode;
-
-               dentry = filp->f_dentry;
-               if (!dentry)
-                       continue;
-               if (dentry->d_op != &proc_dentry_operations)
-                       continue;
-               inode = dentry->d_inode;
-               if (!inode)
-                       continue;
-               if (inode->i_ino != ino)
-                       continue;
-               filp->f_op = NULL;
+       struct list_head *p;
+       struct super_block *sb;
+
+       /*
+        * Actually it's a partial revoke(). We have to go through all
+        * copies of procfs. proc_super_blocks is protected by the big
+        * lock for the time being.
+        */
+       for (sb = proc_super_blocks;
+            sb;
+            sb = (struct super_block*)sb->u.generic_sbp) {
+               file_list_lock();
+               for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
+                       struct file * filp = list_entry(p, struct file, f_list);
+                       struct dentry * dentry;
+                       struct inode * inode;
+
+                       dentry = filp->f_dentry;
+                       if (!dentry)
+                               continue;
+                       if (dentry->d_op != &proc_dentry_operations)
+                               continue;
+                       inode = dentry->d_inode;
+                       if (!inode)
+                               continue;
+                       if (inode->i_ino != ino)
+                               continue;
+                       filp->f_op = NULL;
+               }
+               file_list_unlock();
        }
 }
 
index 5cf5189597d05a77f776eb66f3f1d66ac223ff42..f708150946f5a0fa745a4872a8050a814825d1f6 100644 (file)
@@ -531,6 +531,7 @@ static struct super_block *get_empty_super(void)
                INIT_LIST_HEAD(&s->s_dirty);
                list_add (&s->s_list, super_blocks.prev);
                init_waitqueue_head(&s->s_wait);
+               INIT_LIST_HEAD(&s->s_files);
        }
        return s;
 }
index 5efa992964112311ccba234278f0b8f1a5b6ac2d..699098733ce85866a1d319355c9d844e930e890c 100644 (file)
@@ -5,7 +5,8 @@
 #ifndef __LINUX_FILE_H
 #define __LINUX_FILE_H
 
-extern void __fput(struct file *);
+extern void __fput(struct file *);     /* goner? */
+extern void _fput(struct file *);
 
 /*
  * Check whether the specified task has the fd open. Since the task
@@ -78,7 +79,11 @@ extern inline void fd_install(unsigned int fd, struct file * file)
  * I suspect there are many other similar "optimizations" across the
  * kernel...
  */
-extern void fput(struct file *); 
+extern inline void fput(struct file * file)
+{
+       if (atomic_dec_and_test(&file->f_count))
+               _fput(file);
+}
 extern void put_filp(struct file *);
 
 #endif /* __LINUX_FILE_H */
index 7d60fee3170d0d26964dfc75148d3870ce1c5f42..14d71253df55e43bc03874cbb3fbae2975b47c53 100644 (file)
@@ -400,7 +400,7 @@ struct fown_struct {
 };
 
 struct file {
-       struct file             *f_next, **f_pprev;
+       struct list_head        f_list;
        struct dentry           *f_dentry;
        struct file_operations  *f_op;
        atomic_t                f_count;
@@ -417,6 +417,9 @@ struct file {
        /* needed for tty driver, and maybe others */
        void                    *private_data;
 };
+extern spinlock_t files_lock;
+#define file_list_lock() spin_lock(&files_lock);
+#define file_list_unlock() spin_unlock(&files_lock);
 
 #define get_file(x)    atomic_inc(&(x)->f_count)
 #define file_count(x)  atomic_read(&(x)->f_count)
@@ -527,6 +530,7 @@ struct super_block {
        short int               s_ibasket_count;
        short int               s_ibasket_max;
        struct list_head        s_dirty;        /* dirty inodes */
+       struct list_head        s_files;
 
        union {
                struct minix_sb_info    minix_sb;
@@ -745,8 +749,6 @@ extern struct file_system_type *get_fs_type(const char *);
 extern int fs_may_remount_ro(struct super_block *);
 extern int fs_may_mount(kdev_t);
 
-extern struct file *inuse_filps;
-
 extern int try_to_free_buffers(struct page *);
 extern void refile_buffer(struct buffer_head * buf);
 
@@ -855,6 +857,8 @@ extern struct inode * get_empty_inode(void);
 extern void insert_inode_hash(struct inode *);
 extern void remove_inode_hash(struct inode *);
 extern struct file * get_empty_filp(void);
+extern void file_move(struct file *f, struct list_head *list);
+extern void file_moveto(struct file *new, struct file *old);
 extern struct buffer_head * get_hash_table(kdev_t, int, int);
 extern struct buffer_head * getblk(kdev_t, int, int);
 extern void ll_rw_block(int, int, struct buffer_head * bh[]);
index 5429bb3f9409757dc44038501f9f4cdb85b660e5..cb5a5809ee3ad6feccee69408f82685a89c072e2 100644 (file)
@@ -314,8 +314,8 @@ extern int remap_page_range(unsigned long from, unsigned long to, unsigned long
 extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
 
 extern void vmtruncate(struct inode * inode, unsigned long offset);
-extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
-extern void make_pages_present(unsigned long addr, unsigned long end);
+extern int handle_mm_fault(struct task_struct *tsk,struct vm_area_struct *vma, unsigned long address, int write_access);
+extern int make_pages_present(unsigned long addr, unsigned long end);
 
 extern int pgt_cache_water[2];
 extern int check_pgt_cache(void);
@@ -406,7 +406,7 @@ static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * m
        return vma;
 }
 
-extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
+extern struct vm_area_struct *find_extend_vma(struct task_struct *tsk, unsigned long addr);
 
 #define buffer_under_min()     ((atomic_read(&buffermem) >> PAGE_SHIFT) * 100 < \
                                buffer_mem.min_percent * num_physpages)
index be5c2c66609f410d811f56f49b76bc7a54ba7fed..a349c3a7c92ef283e0346231df013522cde587cf 100644 (file)
@@ -375,6 +375,7 @@ static inline int proc_scsi_unregister(struct proc_dir_entry *driver, int x)
     }
 }
 
+extern struct super_block *proc_super_blocks;
 extern struct dentry_operations proc_dentry_operations;
 extern struct super_block *proc_read_super(struct super_block *,void *,int);
 extern int init_proc_fs(void);
index 251aa8f9d205a795ed0082ca23615dff65933356..df3753dbed66e3d19c343a8ce68dd2b0ba4a07da 100644 (file)
@@ -172,10 +172,8 @@ struct mm_struct {
        atomic_t count;
        int map_count;                          /* number of VMAs */
        struct semaphore mmap_sem;
-       rwlock_t page_table_lock;
+       spinlock_t page_table_lock;
        unsigned long context;
-       unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
-       int swappable:1;
        unsigned long start_code, end_code, start_data, end_data;
        unsigned long start_brk, brk, start_stack;
        unsigned long arg_start, arg_end, env_start, env_end;
@@ -196,9 +194,7 @@ struct mm_struct {
                swapper_pg_dir,                         \
                ATOMIC_INIT(1), 1,                      \
                __MUTEX_INITIALIZER(name.mmap_sem),     \
-               RW_LOCK_UNLOCKED,                       \
-               0,                                      \
-               0, 0, 0, 0, 0, 0,                       \
+               SPIN_LOCK_UNLOCKED,                     \
                0,                                      \
                0, 0, 0, 0,                             \
                0, 0, 0,                                \
@@ -286,6 +282,9 @@ struct task_struct {
        struct tms times;
        unsigned long start_time;
        long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS];
+/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
+       unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
+       int swappable:1;
 /* process credentials */
        uid_t uid,euid,suid,fsuid;
        gid_t gid,egid,sgid,fsgid;
@@ -368,6 +367,8 @@ struct task_struct {
 /* timer */    { NULL, NULL, 0, 0, it_real_fn }, \
 /* utime */    {0,0,0,0},0, \
 /* per CPU times */ {0, }, {0, }, \
+/* flt */      0,0,0,0,0,0, \
+/* swp */      0, \
 /* process credentials */                                      \
 /* uid etc */  0,0,0,0,0,0,0,0,                                \
 /* suppl grps*/ 0, {0,},                                       \
index dd2a63daeb1c333ea93c4ac0a8cb05ee4f4f6c23..edf1ff28f44fd4c888c3a8bd179e2f7207b6c6ec 100644 (file)
@@ -277,6 +277,7 @@ struct tty_struct {
        struct tq_struct tq_hangup;
        void *disc_data;
        void *driver_data;
+       struct list_head tty_files;
 
 #define N_TTY_BUF_SIZE 4096
        
index 73ef988a72d82ee6f0b78f8ec1e01bf5cdb637f6..af6bf7679df0ae22c24af02fade4fbd3e72af290 100644 (file)
@@ -371,10 +371,6 @@ do {       spin_lock_init(&((__sk)->lock.slock)); \
 } while(0);
 
 struct sock {
-       /* This must be first. */
-       struct sock             *sklist_next;
-       struct sock             *sklist_prev;
-
        /* Local port binding hash linkage. */
        struct sock             *bind_next;
        struct sock             **bind_pprev;
@@ -579,10 +575,6 @@ do {       if((__sk)->backlog.tail == NULL) {              \
  * transport -> network interface is defined by struct inet_proto
  */
 struct proto {
-       /* These must be first. */
-       struct sock             *sklist_next;
-       struct sock             *sklist_prev;
-
        void                    (*close)(struct sock *sk, 
                                        long timeout);
        int                     (*connect)(struct sock *sk,
@@ -621,9 +613,7 @@ struct proto {
        /* Keeping track of sk's, looking them up, and port selection methods. */
        void                    (*hash)(struct sock *sk);
        void                    (*unhash)(struct sock *sk);
-       void                    (*rehash)(struct sock *sk);
-       unsigned short          (*good_socknum)(void);
-       int                     (*verify_bind)(struct sock *sk, unsigned short snum);
+       int                     (*get_port)(struct sock *sk, unsigned short snum);
 
        unsigned short          max_header;
        unsigned long           retransmits;
@@ -667,40 +657,6 @@ extern rwlock_t sockhash_lock;
 #define SOCKHASH_LOCK_WRITE_BH()       write_lock(&sockhash_lock)
 #define SOCKHASH_UNLOCK_WRITE_BH()     write_unlock(&sockhash_lock)
 
-/* Some things in the kernel just want to get at a protocols
- * entire socket list commensurate, thus...
- */
-static __inline__ void add_to_prot_sklist(struct sock *sk)
-{
-       SOCKHASH_LOCK_WRITE();
-       if(!sk->sklist_next) {
-               struct proto *p = sk->prot;
-
-               sk->sklist_prev = (struct sock *) p;
-               sk->sklist_next = p->sklist_next;
-               p->sklist_next->sklist_prev = sk;
-               p->sklist_next = sk;
-
-               /* Charge the protocol. */
-               sk->prot->inuse += 1;
-               if(sk->prot->highestinuse < sk->prot->inuse)
-                       sk->prot->highestinuse = sk->prot->inuse;
-       }
-       SOCKHASH_UNLOCK_WRITE();
-}
-
-static __inline__ void del_from_prot_sklist(struct sock *sk)
-{
-       SOCKHASH_LOCK_WRITE();
-       if(sk->sklist_next) {
-               sk->sklist_next->sklist_prev = sk->sklist_prev;
-               sk->sklist_prev->sklist_next = sk->sklist_next;
-               sk->sklist_next = NULL;
-               sk->prot->inuse--;
-       }
-       SOCKHASH_UNLOCK_WRITE();
-}
-
 /* Used by processes to "lock" a socket state, so that
  * interrupts and bottom half handlers won't change it
  * from under us. It essentially blocks any incoming
index 28fc52125dfe658e394e0febab5ad37a5c694ee5..9127795c266b6368d48cb0a43faf74f4e82a886f 100644 (file)
@@ -72,11 +72,7 @@ extern struct sock *tcp_listening_hash[TCP_LHTABLE_SIZE];
  */
 struct tcp_bind_bucket {
        unsigned short          port;
-       unsigned short          flags;
-#define TCPB_FLAG_LOCKED       0x0001
-#define TCPB_FLAG_FASTREUSE    0x0002
-#define TCPB_FLAG_GOODSOCKNUM  0x0004
-
+       unsigned short          fastreuse;
        struct tcp_bind_bucket  *next;
        struct sock             *owners;
        struct tcp_bind_bucket  **pprev;
@@ -115,32 +111,6 @@ static __inline__ int tcp_bhashfn(__u16 lport)
        return (lport & (tcp_bhash_size - 1));
 }
 
-static __inline__ void tcp_sk_bindify(struct sock *sk)
-{
-       struct tcp_bind_bucket *tb;
-       unsigned short snum = sk->num;
-
-       for(tb = tcp_bhash[tcp_bhashfn(snum)]; tb->port != snum; tb = tb->next)
-               ;
-       /* Update bucket flags. */
-       if(tb->owners == NULL) {
-               /* We're the first. */
-               if(sk->reuse && sk->state != TCP_LISTEN)
-                       tb->flags = TCPB_FLAG_FASTREUSE;
-               else
-                       tb->flags = 0;
-       } else {
-               if((tb->flags & TCPB_FLAG_FASTREUSE) &&
-                  ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))
-                       tb->flags &= ~TCPB_FLAG_FASTREUSE;
-       }
-       if((sk->bind_next = tb->owners) != NULL)
-               tb->owners->bind_pprev = &sk->bind_next;
-       tb->owners = sk;
-       sk->bind_pprev = &tb->owners;
-       sk->prev = (struct sock *) tb;
-}
-
 /* This is a TIME_WAIT bucket.  It works around the memory consumption
  * problems of sockets in such a state on heavily loaded servers, but
  * without violating the protocol specification.
@@ -150,8 +120,6 @@ struct tcp_tw_bucket {
         * XXX Yes I know this is gross, but I'd have to edit every single
         * XXX networking file if I created a "struct sock_header". -DaveM
         */
-       struct sock             *sklist_next;
-       struct sock             *sklist_prev;
        struct sock             *bind_next;
        struct sock             **bind_pprev;
        __u32                   daddr;
@@ -477,7 +445,9 @@ extern __inline int between(__u32 seq1, __u32 seq2, __u32 seq3)
 extern struct proto tcp_prot;
 extern struct tcp_mib tcp_statistics;
 
-extern unsigned short          tcp_good_socknum(void);
+extern void                    tcp_put_port(struct sock *sk);
+extern void                    __tcp_put_port(struct sock *sk);
+extern void                    tcp_inherit_port(struct sock *sk, struct sock *child);
 
 extern void                    tcp_v4_err(struct sk_buff *skb,
                                           unsigned char *, int);
@@ -630,8 +600,7 @@ struct tcp_sl_timer {
 #define TCP_SLT_SYNACK         0
 #define TCP_SLT_KEEPALIVE      1
 #define TCP_SLT_TWKILL         2
-#define TCP_SLT_BUCKETGC       3
-#define TCP_SLT_MAX            4
+#define TCP_SLT_MAX            3
 
 extern struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX];
  
@@ -1069,17 +1038,6 @@ extern __inline__ void tcp_dec_slow_timer(int timer)
        atomic_dec(&slt->count);
 }
 
-/* This needs to use a slow timer, so it is here. */
-static __inline__ void tcp_sk_unbindify(struct sock *sk)
-{
-       struct tcp_bind_bucket *tb = (struct tcp_bind_bucket *) sk->prev;
-       if(sk->bind_next)
-               sk->bind_next->bind_pprev = sk->bind_pprev;
-       *sk->bind_pprev = sk->bind_next;
-       if(tb->owners == NULL)
-               tcp_inc_slow_timer(TCP_SLT_BUCKETGC);
-}
-
 extern const char timer_bug_msg[];
 
 static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
index 30a59b3f09132ad84632da4bad8cafbf76a8a63b..dcc2fd09328414a7ce700db72f0b84cae6ff9ff6 100644 (file)
@@ -23,6 +23,7 @@
 #define _UDP_H
 
 #include <linux/udp.h>
+#include <net/sock.h>
 
 #define UDP_HTABLE_SIZE                128
 
  */
 extern struct sock *udp_hash[UDP_HTABLE_SIZE];
 
-extern unsigned short udp_good_socknum(void);
+extern int udp_port_rover;
+
+static inline int udp_lport_inuse(u16 num)
+{
+       struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)];
+
+       for(; sk != NULL; sk = sk->next) {
+               if(sk->num == num)
+                       return 1;
+       }
+       return 0;
+}
 
 /* Note: this must match 'valbool' in sock_setsockopt */
 #define UDP_CSUM_NOXMIT                1
index 395104aaf89de6e2e53fbf3db8a6e42c7c0ea8d0..a02bc8ad124efc5d68e9c58ebd1dd8f91ad3ecf2 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -675,10 +675,10 @@ static unsigned long shm_nopage(struct vm_area_struct * shmd, unsigned long addr
                pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
                shp->shm_pages[idx] = pte_val(pte);
        } else
-               --current->mm->maj_flt;  /* was incremented in do_no_page */
+               --current->maj_flt;  /* was incremented in do_no_page */
 
 done:  /* pte_val(pte) == shp->shm_pages[idx] */
-       current->mm->min_flt++;
+       current->min_flt++;
        get_page(mem_map + MAP_NR(pte_page(pte)));
        return pte_page(pte);
 }
index 63ee87150ee24f0c83c55636b154ad751647bcc2..c6142afc71baddb05cb40040478df5ab38b9967e 100644 (file)
@@ -276,7 +276,7 @@ static int do_acct_process(long exitcode, struct file *file)
         */
        if (!file)
                return 0;
-       atomic_inc(&file->f_count);
+       get_file(file);
        if (!check_free_space(file)) {
                fput(file);
                return 0;
index a4ac8ae66eb8d8da5c896afe10148a44d43adf44..57d8441da5e1e7056f83e05ba3ccf31d115f57a5 100644 (file)
@@ -52,11 +52,9 @@ static void release(struct task_struct * p)
                write_unlock_irq(&tasklist_lock);
 
                release_thread(p);
-#if 0 /* FIXME! How do we do this right for threads? */
                current->cmin_flt += p->min_flt + p->cmin_flt;
                current->cmaj_flt += p->maj_flt + p->cmaj_flt;
                current->cnswap += p->nswap + p->cnswap;
-#endif
                free_task_struct(p);
        } else {
                printk("task releasing itself\n");
@@ -258,6 +256,7 @@ static inline void __exit_mm(struct task_struct * tsk)
                flush_tlb_mm(mm);
                destroy_context(mm);
                tsk->mm = &init_mm;
+               tsk->swappable = 0;
                SET_PAGE_DIR(tsk, swapper_pg_dir);
                mm_release();
                mmput(mm);
index bb4bec1ea5eb9a2993f211e23a4ba90f8426977f..c940938838def29b67f0cfe725fec88f86d73786 100644 (file)
@@ -304,7 +304,7 @@ struct mm_struct * mm_alloc(void)
                mm->map_count = 0;
                mm->def_flags = 0;
                init_MUTEX_LOCKED(&mm->mmap_sem);
-               mm->page_table_lock = RW_LOCK_UNLOCKED;
+               mm->page_table_lock = SPIN_LOCK_UNLOCKED;
                /*
                 * Leave mm->pgd set to the parent's pgd
                 * so that pgd_offset() is always valid.
@@ -315,7 +315,6 @@ struct mm_struct * mm_alloc(void)
                 * cache or tlb.
                 */
                mm->cpu_vm_mask = 0;
-               mm->swappable = 0;
        }
        return mm;
 }
@@ -378,6 +377,9 @@ static inline int copy_mm(int nr, unsigned long clone_flags, struct task_struct
                goto fail_nomem;
 
        tsk->mm = mm;
+       tsk->min_flt = tsk->maj_flt = 0;
+       tsk->cmin_flt = tsk->cmaj_flt = 0;
+       tsk->nswap = tsk->cnswap = 0;
        copy_segments(nr, tsk, mm);
        retval = new_page_tables(tsk);
        if (retval)
@@ -575,6 +577,7 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
                __MOD_INC_USE_COUNT(p->binfmt->module);
 
        p->did_exec = 0;
+       p->swappable = 0;
        p->state = TASK_UNINTERRUPTIBLE;
 
        copy_flags(clone_flags, p);
@@ -639,7 +642,7 @@ int do_fork(unsigned long clone_flags, unsigned long usp, struct pt_regs *regs)
        p->semundo = NULL;
 
        /* ok, now we should be set up.. */
-       p->mm->swappable = 1;
+       p->swappable = 1;
        p->exit_signal = clone_flags & CSIGNAL;
        p->pdeath_signal = 0;
 
index 04fd2517aa383e70aee8242d0edcc3501be29584..8a30af4dbda998101de365a469744fe207537ff3 100644 (file)
@@ -116,7 +116,8 @@ EXPORT_SYMBOL(update_atime);
 EXPORT_SYMBOL(get_super);
 EXPORT_SYMBOL(get_fs_type);
 EXPORT_SYMBOL(getname);
-EXPORT_SYMBOL(__fput);
+EXPORT_SYMBOL(__fput); /* goner? */
+EXPORT_SYMBOL(_fput);
 EXPORT_SYMBOL(igrab);
 EXPORT_SYMBOL(iunique);
 EXPORT_SYMBOL(iget);
@@ -141,8 +142,8 @@ EXPORT_SYMBOL(get_empty_filp);
 EXPORT_SYMBOL(init_private_file);
 EXPORT_SYMBOL(filp_open);
 EXPORT_SYMBOL(filp_close);
-EXPORT_SYMBOL(fput);
 EXPORT_SYMBOL(put_filp);
+EXPORT_SYMBOL(files_lock);
 EXPORT_SYMBOL(check_disk_change);
 EXPORT_SYMBOL(invalidate_buffers);
 EXPORT_SYMBOL(invalidate_inodes);
index 0179dc7ea29deb9bb982164a270933d1bcc2de2b..9ab1b2f4ce009393afd386540147001ccf2c74ec 100644 (file)
@@ -944,27 +944,27 @@ int getrusage(struct task_struct *p, int who, struct rusage *ru)
                        r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime);
                        r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime);
                        r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime);
-                       r.ru_minflt = 0;
-                       r.ru_majflt = 0;
-                       r.ru_nswap = 0;
+                       r.ru_minflt = p->min_flt;
+                       r.ru_majflt = p->maj_flt;
+                       r.ru_nswap = p->nswap;
                        break;
                case RUSAGE_CHILDREN:
                        r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_cutime);
                        r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_cutime);
                        r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_cstime);
                        r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_cstime);
-                       r.ru_minflt = 0;
-                       r.ru_majflt = 0;
-                       r.ru_nswap = 0;
+                       r.ru_minflt = p->cmin_flt;
+                       r.ru_majflt = p->cmaj_flt;
+                       r.ru_nswap = p->cnswap;
                        break;
                default:
                        r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime + p->times.tms_cutime);
                        r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime + p->times.tms_cutime);
                        r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime + p->times.tms_cstime);
                        r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime + p->times.tms_cstime);
-                       r.ru_minflt = 0;
-                       r.ru_majflt = 0;
-                       r.ru_nswap = 0;
+                       r.ru_minflt = p->min_flt + p->cmin_flt;
+                       r.ru_majflt = p->maj_flt + p->cmaj_flt;
+                       r.ru_nswap = p->nswap + p->cnswap;
                        break;
        }
        return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
index c6cf211aa268d1d4064af7fccac5a6422b07bcdd..04d0f534d0c286dcf4108f30e8b7fe171bd536eb 100644 (file)
@@ -605,7 +605,7 @@ unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsig
  * We enter with the page table read-lock held, and need to exit without
  * it.
  */
-static int do_wp_page(struct mm_struct * mm, struct vm_area_struct * vma,
+static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
        unsigned long address, pte_t *page_table, pte_t pte)
 {
        unsigned long old_page, new_page;
@@ -614,7 +614,7 @@ static int do_wp_page(struct mm_struct * mm, struct vm_area_struct * vma,
        old_page = pte_page(pte);
        if (MAP_NR(old_page) >= max_mapnr)
                goto bad_wp_page;
-       mm->min_flt++;
+       tsk->min_flt++;
        page = mem_map + MAP_NR(old_page);
        
        /*
@@ -637,18 +637,18 @@ static int do_wp_page(struct mm_struct * mm, struct vm_area_struct * vma,
                flush_cache_page(vma, address);
                set_pte(page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
                flush_tlb_page(vma, address);
-               read_unlock(&mm->page_table_lock);
+               spin_unlock(&tsk->mm->page_table_lock);
                return 1;
        }
 
        /*
         * Ok, we need to copy. Oh, well..
         */
-       read_unlock(&mm->page_table_lock);
+       spin_unlock(&tsk->mm->page_table_lock);
        new_page = __get_free_page(GFP_USER);
        if (!new_page)
-               return 0;
-       read_lock(&mm->page_table_lock);
+               return -1;
+       spin_lock(&tsk->mm->page_table_lock);
 
        /*
         * Re-check the pte - we dropped the lock
@@ -666,13 +666,13 @@ static int do_wp_page(struct mm_struct * mm, struct vm_area_struct * vma,
                /* Free the old page.. */
                new_page = old_page;
        }
-       read_unlock(&mm->page_table_lock);
+       spin_unlock(&tsk->mm->page_table_lock);
        free_page(new_page);
        return 1;
 
 bad_wp_page:
        printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
-       return 0;
+       return -1;
 }
 
 /*
@@ -800,7 +800,7 @@ static void swapin_readahead(unsigned long entry)
        return;
 }
 
-static int do_swap_page(struct mm_struct * mm, 
+static int do_swap_page(struct task_struct * tsk,
        struct vm_area_struct * vma, unsigned long address,
        pte_t * page_table, unsigned long entry, int write_access)
 {
@@ -813,13 +813,13 @@ static int do_swap_page(struct mm_struct * mm,
                page = read_swap_cache(entry);
                unlock_kernel();
                if (!page)
-                       return 0;
+                       return -1;
 
                flush_page_to_ram(page_address(page));
        }
 
        vma->vm_mm->rss++;
-       mm->min_flt++;
+       tsk->min_flt++;
        swap_free(entry);
 
        pte = mk_pte(page_address(page), vma->vm_page_prot);
@@ -829,27 +829,30 @@ static int do_swap_page(struct mm_struct * mm,
                pte = pte_mkwrite(pte_mkdirty(pte));
        }
        set_pte(page_table, pte);
-               
+       /* No need to invalidate - it was non-present before */
+       update_mmu_cache(vma, address, pte);
        return 1;
 }
 
 /*
  * This only needs the MM semaphore
  */
-static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
+static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
 {
        pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
        if (write_access) {
                unsigned long page = __get_free_page(GFP_USER);
                if (!page)
-                       return 0;
+                       return -1;
                clear_page(page);
                entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
                vma->vm_mm->rss++;
-               mm->min_flt++;
+               tsk->min_flt++;
                flush_page_to_ram(page);
        }
        set_pte(page_table, entry);
+       /* No need to invalidate - it was non-present before */
+       update_mmu_cache(vma, addr, entry);
        return 1;
 }
 
@@ -865,14 +868,14 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
  * This is called with the MM semaphore and the kernel lock held.
  * We need to release the kernel lock as soon as possible..
  */
-static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
+static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
        unsigned long address, int write_access, pte_t *page_table)
 {
        unsigned long page;
        pte_t entry;
 
        if (!vma->vm_ops || !vma->vm_ops->nopage)
-               return do_anonymous_page(mm, vma, page_table, write_access, address);
+               return do_anonymous_page(tsk, vma, page_table, write_access, address);
 
        /*
         * The third argument is "no_share", which tells the low-level code
@@ -881,9 +884,9 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
         */
        page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access);
        if (!page)
-               return 0;
+               return 0;       /* SIGBUS - but we _really_ should know whether it is OOM or SIGBUS */
 
-       ++mm->maj_flt;
+       ++tsk->maj_flt;
        ++vma->vm_mm->rss;
        /*
         * This silly early PAGE_DIRTY setting removes a race
@@ -904,6 +907,7 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
                entry = pte_wrprotect(entry);
        set_pte(page_table, entry);
        /* no need to invalidate: a not-present page shouldn't be cached */
+       update_mmu_cache(vma, address, entry);
        return 1;
 }
 
@@ -925,7 +929,7 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
  * so we don't need to worry about a page being suddenly been added into
  * our VM.
  */
-static inline int handle_pte_fault(struct mm_struct *mm,
+static inline int handle_pte_fault(struct task_struct *tsk,
        struct vm_area_struct * vma, unsigned long address,
        int write_access, pte_t * pte)
 {
@@ -934,8 +938,8 @@ static inline int handle_pte_fault(struct mm_struct *mm,
        entry = *pte;
        if (!pte_present(entry)) {
                if (pte_none(entry))
-                       return do_no_page(mm, vma, address, write_access, pte);
-               return do_swap_page(mm, vma, address, pte, pte_val(entry), write_access);
+                       return do_no_page(tsk, vma, address, write_access, pte);
+               return do_swap_page(tsk, vma, address, pte, pte_val(entry), write_access);
        }
 
        /*
@@ -943,25 +947,27 @@ static inline int handle_pte_fault(struct mm_struct *mm,
         * lock to synchronize with kswapd, and verify that the entry
         * didn't change from under us..
         */
-       read_lock(&mm->page_table_lock);
+       spin_lock(&tsk->mm->page_table_lock);
        if (pte_val(entry) == pte_val(*pte)) {
                if (write_access) {
                        if (!pte_write(entry))
-                               return do_wp_page(mm, vma, address, pte, entry);
+                               return do_wp_page(tsk, vma, address, pte, entry);
 
                        entry = pte_mkdirty(entry);
                }
-               set_pte(pte, pte_mkyoung(entry));
+               entry = pte_mkyoung(entry);
+               set_pte(pte, entry);
                flush_tlb_page(vma, address);
+               update_mmu_cache(vma, address, entry);
        }
-       read_unlock(&mm->page_table_lock);
+       spin_unlock(&tsk->mm->page_table_lock);
        return 1;
 }
 
 /*
  * By the time we get here, we already hold the mm semaphore
  */
-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
+int handle_mm_fault(struct task_struct *tsk, struct vm_area_struct * vma,
        unsigned long address, int write_access)
 {
        pgd_t *pgd;
@@ -971,29 +977,27 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
        pmd = pmd_alloc(pgd, address);
        if (pmd) {
                pte_t * pte = pte_alloc(pmd, address);
-               if (pte) {
-                       if (handle_pte_fault(mm, vma, address, write_access, pte)) {
-                               update_mmu_cache(vma, address, *pte);
-                               return 1;
-                       }
-               }
+               if (pte)
+                       return handle_pte_fault(tsk, vma, address, write_access, pte);
        }
-       return 0;
+       return -1;
 }
 
 /*
  * Simplistic page force-in..
  */
-void make_pages_present(unsigned long addr, unsigned long end)
+int make_pages_present(unsigned long addr, unsigned long end)
 {
        int write;
-       struct mm_struct *mm = current->mm;
+       struct task_struct *tsk = current;
        struct vm_area_struct * vma;
 
-       vma = find_vma(mm, addr);
+       vma = find_vma(tsk->mm, addr);
        write = (vma->vm_flags & VM_WRITE) != 0;
        while (addr < end) {
-               handle_mm_fault(mm, vma, addr, write);
+               if (handle_mm_fault(tsk, vma, addr, write) < 0)
+                       return -1;
                addr += PAGE_SIZE;
        }
+       return 0;
 }
index 8ed2979d9680faebffad7b6fed87342cc5959e9f..c9d07a2916794b70611c4b5d0c29b03891b651d8 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -460,13 +460,13 @@ struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
        return NULL;
 }
 
-struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr)
+struct vm_area_struct * find_extend_vma(struct task_struct * tsk, unsigned long addr)
 {
        struct vm_area_struct * vma;
        unsigned long start;
 
        addr &= PAGE_MASK;
-       vma = find_vma(mm,addr);
+       vma = find_vma(tsk->mm,addr);
        if (!vma)
                return NULL;
        if (vma->vm_start <= addr)
index 3567098a15a38951a9345ad4c93df8562c58eafb..1ae052b94a7ea00d561b7c1b7e92d94cfa5dfbaa 100644 (file)
@@ -47,7 +47,7 @@ static int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
                goto out_failed;
 
        page = mem_map + MAP_NR(page_addr);
-       write_lock(&tsk->mm->page_table_lock);
+       spin_lock(&tsk->mm->page_table_lock);
        if (pte_val(pte) != pte_val(*page_table))
                goto out_failed_unlock;
 
@@ -138,7 +138,7 @@ drop_pte:
        if (vma->vm_ops && vma->vm_ops->swapout) {
                pid_t pid = tsk->pid;
                pte_clear(page_table);
-               write_unlock(&tsk->mm->page_table_lock);
+               spin_unlock(&tsk->mm->page_table_lock);
                flush_tlb_page(vma, address);
                vma->vm_mm->rss--;
                
@@ -158,9 +158,9 @@ drop_pte:
                goto out_failed; /* No swap space left */
                
        vma->vm_mm->rss--;
-       tsk->mm->nswap++;
+       tsk->nswap++;
        set_pte(page_table, __pte(entry));
-       write_unlock(&tsk->mm->page_table_lock);
+       spin_unlock(&tsk->mm->page_table_lock);
 
        flush_tlb_page(vma, address);
        swap_duplicate(entry);  /* One for the process, one for the swap cache */
@@ -175,7 +175,7 @@ out_free_success:
        __free_page(page);
        return 1;
 out_failed_unlock:
-       write_unlock(&tsk->mm->page_table_lock);
+       spin_unlock(&tsk->mm->page_table_lock);
 out_failed:
        return 0;
 }
@@ -352,7 +352,7 @@ static int swap_out(unsigned int priority, int gfp_mask)
                read_lock(&tasklist_lock);
                p = init_task.next_task;
                for (; p != &init_task; p = p->next_task) {
-                       if (!p->mm->swappable)
+                       if (!p->swappable)
                                continue;
                        if (p->mm->rss <= 0)
                                continue;
index 7e9f466cad966765cc29d7788ce91dc0148fd482..e2073166f35f244401292f1a263ea3569407cca0 100644 (file)
@@ -232,8 +232,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
                        break;
                }
                /* Bump the usage count and install the file. */
-               atomic_inc(&fp[i]->f_count);
-               current->files->fd[new_fd] = fp[i];
+               get_file(fp[i]);
+               fd_install(new_fd, fp[i]);
        }
 
        if (i > 0)
@@ -271,10 +271,9 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl)
 
        new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
        if (new_fpl) {
-               memcpy(new_fpl, fpl, sizeof(*fpl));
-
                for (i=fpl->count-1; i>=0; i--)
-                       atomic_inc(&fpl->fp[i]->f_count);
+                       get_file(fpl->fp[i]);
+               memcpy(new_fpl, fpl, sizeof(*fpl));
        }
        return new_fpl;
 }
index 8930109b37a02300026362774aa8bccc6cda000f..d790ae536bcc4acf95691a2b4033848c87f0f19a 100644 (file)
@@ -759,7 +759,8 @@ static struct sock *ec_listening_socket(unsigned char port, unsigned char
                    (opt->station == station || opt->station == 0) &&
                    (opt->net == net || opt->net == 0))
                        return sk;
-               sk = sk->sklist_next;
+
+               sk = sk->next;
        }
 
        return NULL;
index ca0f27d0cb3109b8d1df41e0b2124ece6e01c02c..15b26fa1c688a100a5d981aad0421f8d17613a9a 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             PF_INET protocol family socket handler.
  *
- * Version:    $Id: af_inet.c,v 1.91 1999/06/09 08:28:55 davem Exp $
+ * Version:    $Id: af_inet.c,v 1.93 1999/07/02 11:26:24 davem Exp $
  *
  * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -162,9 +162,6 @@ static __inline__ void kill_sk_queues(struct sock *sk)
 
 static __inline__ void kill_sk_now(struct sock *sk)
 {
-       /* No longer exists. */
-       del_from_prot_sklist(sk);
-
        /* Remove from protocol hash chains. */
        sk->prot->unhash(sk);
 
@@ -239,7 +236,7 @@ int inet_setsockopt(struct socket *sock, int level, int optname,
 {
        struct sock *sk=sock->sk;
        if (sk->prot->setsockopt==NULL)
-               return(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
        return sk->prot->setsockopt(sk,level,optname,optval,optlen);
 }
 
@@ -256,7 +253,7 @@ int inet_getsockopt(struct socket *sock, int level, int optname,
 {
        struct sock *sk=sock->sk;
        if (sk->prot->getsockopt==NULL)
-               return(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
        return sk->prot->getsockopt(sk,level,optname,optval,optlen);
 }
 
@@ -268,12 +265,10 @@ static int inet_autobind(struct sock *sk)
 {
        /* We may need to bind the socket. */
        if (sk->num == 0) {
-               sk->num = sk->prot->good_socknum();
-               if (sk->num == 0) 
-                       return(-EAGAIN);
+               if (sk->prot->get_port(sk, 0) != 0)
+                       return -EAGAIN;
                sk->sport = htons(sk->num);
                sk->prot->hash(sk);
-               add_to_prot_sklist(sk);
        }
        return 0;
 }
@@ -293,29 +288,38 @@ static void inet_listen_write_space(struct sock *sk)
 int inet_listen(struct socket *sock, int backlog)
 {
        struct sock *sk = sock->sk;
+       unsigned char old_state;
 
        if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
-               return(-EINVAL);
-
-       if (inet_autobind(sk) != 0)
-               return -EAGAIN;
+               return -EINVAL;
 
-       /* We might as well re use these. */ 
        if ((unsigned) backlog == 0)    /* BSDism */
                backlog = 1;
        if ((unsigned) backlog > SOMAXCONN)
                backlog = SOMAXCONN;
        sk->max_ack_backlog = backlog;
-       if (sk->state != TCP_LISTEN) {
-               sk->ack_backlog = 0;
+
+       /* Really, if the socket is already in listen state
+        * we can only allow the backlog to be adjusted.
+        */
+       old_state = sk->state;
+       if (old_state != TCP_LISTEN) {
                sk->state = TCP_LISTEN;
+               sk->ack_backlog = 0;
+               if (sk->num == 0) {
+                       if (sk->prot->get_port(sk, 0) != 0) {
+                               sk->state = old_state;
+                               return -EAGAIN;
+                       }
+                       sk->sport = htons(sk->num);
+               }
+
                dst_release(xchg(&sk->dst_cache, NULL));
-               sk->prot->rehash(sk);
-               add_to_prot_sklist(sk);
+               sk->prot->hash(sk);
+               sk->socket->flags |= SO_ACCEPTCON;
                sk->write_space = inet_listen_write_space;
        }
-       sk->socket->flags |= SO_ACCEPTCON;
-       return(0);
+       return 0;
 }
 
 /*
@@ -427,7 +431,6 @@ static int inet_create(struct socket *sock, int protocol)
 
                /* Add to protocol hash chains. */
                sk->prot->hash(sk);
-               add_to_prot_sklist(sk);
        }
 
        if (sk->prot->init) {
@@ -486,11 +489,9 @@ int inet_release(struct socket *sock, struct socket *peersock)
                 */
                timeout = 0;
                if (sk->linger && !(current->flags & PF_EXITING)) {
-                       timeout = MAX_SCHEDULE_TIMEOUT;
-
-                       /* XXX This makes no sense whatsoever... -DaveM */
-                       if (!sk->lingertime)
-                               timeout = HZ*sk->lingertime;
+                       timeout = HZ * sk->lingertime;
+                       if (!timeout)
+                               timeout = MAX_SCHEDULE_TIMEOUT;
                }
                sock->sk = NULL;
                sk->socket = NULL;
@@ -543,21 +544,17 @@ static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
        if((snum >= PORT_MASQ_BEGIN) && (snum <= PORT_MASQ_END))
                return -EADDRINUSE;
 #endif          
-       if (snum == 0) 
-               snum = sk->prot->good_socknum();
-       if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
-               return(-EACCES);
+       if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+               return -EACCES;
        
        /* Make sure we are allowed to bind here. */
-       if(sk->prot->verify_bind(sk, snum))
+       if (sk->prot->get_port(sk, snum) != 0)
                return -EADDRINUSE;
 
-       sk->num = snum;
-       sk->sport = htons(snum);
+       sk->sport = htons(sk->num);
        sk->daddr = 0;
        sk->dport = 0;
-       sk->prot->rehash(sk);
-       add_to_prot_sklist(sk);
+       sk->prot->hash(sk);
        dst_release(sk->dst_cache);
        sk->dst_cache=NULL;
        return(0);
@@ -570,12 +567,12 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
        int err;
 
        if (inet_autobind(sk) != 0)
-               return(-EAGAIN);
+               return -EAGAIN;
        if (sk->prot->connect == NULL) 
-               return(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
        err = sk->prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
        if (err < 0) 
-               return(err);
+               return err;
        return(0);
 }
 
@@ -626,18 +623,20 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
                if (flags & O_NONBLOCK)
                        return -EALREADY;
        } else {
+               if (sk->prot->connect == NULL) 
+                       return -EOPNOTSUPP;
+
                /* We may need to bind the socket. */
                if (inet_autobind(sk) != 0)
-                       return(-EAGAIN);
-               if (sk->prot->connect == NULL) 
-                       return(-EOPNOTSUPP);
+                       return -EAGAIN;
+
                err = sk->prot->connect(sk, uaddr, addr_len);
                /* Note: there is a theoretical race here when an wake up
                   occurred before inet_wait_for_connect is entered. In 2.3
                   the wait queue setup should be moved before the low level
                   connect call. -AK*/
                if (err < 0)
-                       return(err);
+                       return err;
                sock->state = SS_CONNECTING;
        }
        
@@ -645,7 +644,7 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
                goto sock_error;
 
        if (sk->state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) 
-               return (-EINPROGRESS);
+               return -EINPROGRESS;
 
        if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
                inet_wait_for_connect(sk);
@@ -656,7 +655,7 @@ int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
        sock->state = SS_CONNECTED;
        if ((sk->state != TCP_ESTABLISHED) && sk->err)
                goto sock_error; 
-       return(0);
+       return 0;
 
 sock_error:    
        /* This is ugly but needed to fix a race in the ICMP error handler */
@@ -750,7 +749,7 @@ static int inet_getname(struct socket *sock, struct sockaddr *uaddr,
        sin->sin_family = AF_INET;
        if (peer) {
                if (!tcp_connected(sk->state)) 
-                       return(-ENOTCONN);
+                       return -ENOTCONN;
                sin->sin_port = sk->dport;
                sin->sin_addr.s_addr = sk->daddr;
        } else {
@@ -774,12 +773,12 @@ int inet_recvmsg(struct socket *sock, struct msghdr *msg, int size,
        int err;
        
        if (sock->flags & SO_ACCEPTCON)
-               return(-EINVAL);
+               return -EINVAL;
        if (sk->prot->recvmsg == NULL) 
-               return(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
        /* We may need to bind the socket. */
        if (inet_autobind(sk) != 0)
-               return(-EAGAIN);
+               return -EAGAIN;
        err = sk->prot->recvmsg(sk, msg, size, flags&MSG_DONTWAIT,
                                flags&~MSG_DONTWAIT, &addr_len);
        if (err >= 0)
@@ -796,15 +795,15 @@ int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size,
        if (sk->shutdown & SEND_SHUTDOWN) {
                if (!(msg->msg_flags&MSG_NOSIGNAL))
                        send_sig(SIGPIPE, current, 1);
-               return(-EPIPE);
+               return -EPIPE;
        }
        if (sk->prot->sendmsg == NULL) 
-               return(-EOPNOTSUPP);
+               return -EOPNOTSUPP;
        if(sk->err)
                return sock_error(sk);
 
        /* We may need to bind the socket. */
-       if(inet_autobind(sk) != 0)
+       if (inet_autobind(sk) != 0)
                return -EAGAIN;
 
        return sk->prot->sendmsg(sk, msg, size);
@@ -822,11 +821,13 @@ int inet_shutdown(struct socket *sock, int how)
                       1->2 bit 2 snds.
                       2->3 */
        if ((how & ~SHUTDOWN_MASK) || how==0)   /* MAXINT->0 */
-               return(-EINVAL);
+               return -EINVAL;
+       if (!sk)
+               return -ENOTCONN;
        if (sock->state == SS_CONNECTING && sk->state == TCP_ESTABLISHED)
                sock->state = SS_CONNECTED;
-       if (!sk || !tcp_connected(sk->state)) 
-               return(-ENOTCONN);
+       if (!tcp_connected(sk->state)) 
+               return -ENOTCONN;
        sk->shutdown |= how;
        if (sk->prot->shutdown)
                sk->prot->shutdown(sk, how);
index 52c5ee5a40b52bbe1364d81dd7b56e6a9575c66a..7057c343aef19d198fdf69be3115eef03435c8d0 100644 (file)
@@ -7,7 +7,7 @@
  *             PROC file system.  It is mainly used for debugging and
  *             statistics.
  *
- * Version:    $Id: proc.c,v 1.35 1999/05/27 00:37:38 davem Exp $
+ * Version:    $Id: proc.c,v 1.36 1999/07/02 11:26:34 davem Exp $
  *
  * Authors:    Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  *             Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de>
 #include <net/sock.h>
 #include <net/raw.h>
 
-/* Format a single open_request into tmpbuf. */
-static inline void get__openreq(struct sock *sk, struct open_request *req, 
-                               char *tmpbuf, 
-                               int i)
-{
-       sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
-               " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u",
-               i,
-               (long unsigned int)req->af.v4_req.loc_addr,
-               ntohs(sk->sport),
-               (long unsigned int)req->af.v4_req.rmt_addr,
-               ntohs(req->rmt_port),
-               TCP_SYN_RECV,
-               0,0, /* could print option size, but that is af dependent. */
-               1,   /* timers active (only the expire timer) */  
-               (unsigned long)(req->expires - jiffies), 
-               req->retrans,
-               sk->socket ? sk->socket->inode->i_uid : 0,
-               0,  /* non standard timer */  
-               0 /* open_requests have no inode */
-               ); 
-}
-
-/* Format a single socket into tmpbuf. */
-static inline void get__sock(struct sock *sp, char *tmpbuf, int i, int format)
-{
-       unsigned long  dest, src;
-       unsigned short destp, srcp;
-       int timer_active, timer_active1, timer_active2;
-       int tw_bucket = 0;
-       unsigned long timer_expires;
-       struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
-
-       dest  = sp->daddr;
-       src   = sp->rcv_saddr;
-       destp = sp->dport;
-       srcp  = sp->sport;
-       
-       /* FIXME: The fact that retransmit_timer occurs as a field
-        * in two different parts of the socket structure is,
-        * to say the least, confusing. This code now uses the
-        * right retransmit_timer variable, but I'm not sure
-        * the rest of the timer stuff is still correct.
-        * In particular I'm not sure what the timeout value
-        * is suppose to reflect (as opposed to tm->when). -- erics
-        */
-       
-       destp = ntohs(destp);
-       srcp  = ntohs(srcp);
-       if((format == 0) && (sp->state == TCP_TIME_WAIT)) {
-               extern int tcp_tw_death_row_slot;
-               struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sp;
-               int slot_dist;
-
-               tw_bucket       = 1;
-               timer_active1   = timer_active2 = 0;
-               timer_active    = 3;
-               slot_dist       = tw->death_slot;
-               if(slot_dist > tcp_tw_death_row_slot)
-                       slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot;
-               else
-                       slot_dist = tcp_tw_death_row_slot - slot_dist;
-               timer_expires   = jiffies + (slot_dist * TCP_TWKILL_PERIOD);
-       } else {
-               timer_active1 = tp->retransmit_timer.prev != NULL;
-               timer_active2 = sp->timer.prev != NULL;
-               timer_active    = 0;
-               timer_expires   = (unsigned) -1;
-       }
-       if (timer_active1 && tp->retransmit_timer.expires < timer_expires) {
-               timer_active    = 1;
-               timer_expires   = tp->retransmit_timer.expires;
-       }
-       if (timer_active2 && sp->timer.expires < timer_expires) {
-               timer_active    = 2;
-               timer_expires   = sp->timer.expires;
-       }
-       if(timer_active == 0)
-               timer_expires = jiffies;
-       sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
-               " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
-               i, src, srcp, dest, destp, sp->state, 
-               (tw_bucket ?
-                0 :
-                (format == 0) ?
-                tp->write_seq-tp->snd_una : atomic_read(&sp->wmem_alloc)),
-               (tw_bucket ?
-                0 :
-                (format == 0) ?
-                tp->rcv_nxt-tp->copied_seq: atomic_read(&sp->rmem_alloc)),
-               timer_active, timer_expires-jiffies,
-               (tw_bucket ? 0 : tp->retransmits),
-               (!tw_bucket && sp->socket) ? sp->socket->inode->i_uid : 0,
-               (!tw_bucket && timer_active) ? sp->timeout : 0,
-               (!tw_bucket && sp->socket) ? sp->socket->inode->i_ino : 0);
-}
-
-/*
- * Get__netinfo returns the length of that string.
- *
- * KNOWN BUGS
- *  As in get_unix_netinfo, the buffer might be too small. If this
- *  happens, get__netinfo returns only part of the available infos.
- *
- *  Assumes that buffer length is a multiply of 128 - if not it will
- *  write past the end.   
- */
-static int
-get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t offset, int length)
-{
-       struct sock *sp, *next;
-       int len=0, i = 0;
-       off_t pos=0;
-       off_t begin;
-       char tmpbuf[129];
-  
-       if (offset < 128) 
-               len += sprintf(buffer, "%-127s\n",
-                              "  sl  local_address rem_address   st tx_queue "
-                              "rx_queue tr tm->when retrnsmt   uid  timeout inode");
-       pos = 128;
-       SOCKHASH_LOCK_READ();
-       sp = pro->sklist_next;
-       while(sp != (struct sock *)pro) {
-               if (format == 0 && sp->state == TCP_LISTEN) {
-                       struct open_request *req;
-
-                       for (req = sp->tp_pinfo.af_tcp.syn_wait_queue; req;
-                            i++, req = req->dl_next) {
-                               if (req->sk)
-                                       continue;
-                               pos += 128;
-                               if (pos < offset) 
-                                       continue;
-                               get__openreq(sp, req, tmpbuf, i); 
-                               len += sprintf(buffer+len, "%-127s\n", tmpbuf);
-                               if(len >= length) 
-                                       goto out;
-                       }
-               }
-               
-               pos += 128;
-               if (pos < offset)
-                       goto next;
-               
-               get__sock(sp, tmpbuf, i, format);
-               
-               len += sprintf(buffer+len, "%-127s\n", tmpbuf);
-               if(len >= length)
-                       break;
-       next:
-               next = sp->sklist_next;
-               sp = next;
-               i++;
-       }
-out: 
-       SOCKHASH_UNLOCK_READ();
-       
-       begin = len - (pos - offset);
-       *start = buffer + begin;
-       len -= begin;
-       if(len>length)
-               len = length;
-       if (len<0)
-               len = 0; 
-       return len;
-} 
-
-int tcp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
-       return get__netinfo(&tcp_prot, buffer,0, start, offset, length);
-}
-
-int udp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
-       return get__netinfo(&udp_prot, buffer,1, start, offset, length);
-}
-
-int raw_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
-       return get__netinfo(&raw_prot, buffer,1, start, offset, length);
-}
-
 /*
  *     Report socket allocation statistics [mea@utu.fi]
  */
index dd2e7555e64a39bfbc179bbb4a03e7d7ee271bef..584fe81fc7abf13e4ce9c95740b32809eb65bea1 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             RAW - implementation of IP "raw" sockets.
  *
- * Version:    $Id: raw.c,v 1.41 1999/05/30 01:16:19 davem Exp $
+ * Version:    $Id: raw.c,v 1.42 1999/07/02 11:26:26 davem Exp $
  *
  * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -70,60 +70,32 @@ struct sock *raw_v4_htable[RAWV4_HTABLE_SIZE];
 
 static void raw_v4_hash(struct sock *sk)
 {
-       struct sock **skp;
-       int num = sk->num;
+       struct sock **skp = &raw_v4_htable[sk->num & (RAWV4_HTABLE_SIZE - 1)];
 
-       num &= (RAWV4_HTABLE_SIZE - 1);
-       skp = &raw_v4_htable[num];
        SOCKHASH_LOCK_WRITE();
-       sk->next = *skp;
+       if ((sk->next = *skp) != NULL)
+               (*skp)->pprev = &sk->next;
        *skp = sk;
-       sk->hashent = num;
+       sk->pprev = skp;
+       sk->prot->inuse++;
+       if(sk->prot->highestinuse < sk->prot->inuse)
+               sk->prot->highestinuse = sk->prot->inuse;
        SOCKHASH_UNLOCK_WRITE();
 }
 
 static void raw_v4_unhash(struct sock *sk)
 {
-       struct sock **skp;
-       int num = sk->num;
-
-       num &= (RAWV4_HTABLE_SIZE - 1);
-       skp = &raw_v4_htable[num];
-
        SOCKHASH_LOCK_WRITE();
-       while(*skp != NULL) {
-               if(*skp == sk) {
-                       *skp = sk->next;
-                       break;
-               }
-               skp = &((*skp)->next);
+       if (sk->pprev) {
+               if (sk->next)
+                       sk->next->pprev = sk->pprev;
+               *sk->pprev = sk->next;
+               sk->pprev = NULL;
+               sk->prot->inuse--;
        }
        SOCKHASH_UNLOCK_WRITE();
 }
 
-static void raw_v4_rehash(struct sock *sk)
-{
-       struct sock **skp;
-       int num = sk->num;
-       int oldnum = sk->hashent;
-
-       num &= (RAWV4_HTABLE_SIZE - 1);
-       skp = &raw_v4_htable[oldnum];
-
-       SOCKHASH_LOCK_WRITE();
-       while(*skp != NULL) {
-               if(*skp == sk) {
-                       *skp = sk->next;
-                       break;
-               }
-               skp = &((*skp)->next);
-       }
-       sk->next = raw_v4_htable[num];
-       raw_v4_htable[num] = sk;
-       sk->hashent = num;
-       SOCKHASH_UNLOCK_WRITE();
-}
-
 static __inline__ struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
                                               unsigned long raddr, unsigned long laddr,
                                               int dif)
@@ -640,9 +612,69 @@ static int raw_getsockopt(struct sock *sk, int level, int optname,
        return -ENOPROTOOPT;
 }
 
+static void get_raw_sock(struct sock *sp, char *tmpbuf, int i)
+{
+       unsigned int dest, src;
+       __u16 destp, srcp;
+       int timer_active;
+       unsigned long timer_expires;
+
+       dest  = sp->daddr;
+       src   = sp->rcv_saddr;
+       destp = ntohs(sp->dport);
+       srcp  = ntohs(sp->sport);
+       timer_active = (sp->timer.prev != NULL) ? 2 : 0;
+       timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies);
+       sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+               " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+               i, src, srcp, dest, destp, sp->state, 
+               atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
+               timer_active, timer_expires-jiffies, 0,
+               sp->socket->inode->i_uid, timer_active ? sp->timeout : 0,
+               sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+int raw_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+       int len = 0, num = 0, i;
+       off_t pos = 0;
+       off_t begin;
+       char tmpbuf[129];
+
+       if (offset < 128) 
+               len += sprintf(buffer, "%-127s\n",
+                              "  sl  local_address rem_address   st tx_queue "
+                              "rx_queue tr tm->when retrnsmt   uid  timeout inode");
+       pos = 128;
+       SOCKHASH_LOCK_READ();
+       for (i = 0; i < RAWV4_HTABLE_SIZE; i++) {
+               struct sock *sk;
+
+               for (sk = raw_v4_htable[i]; sk; sk = sk->next, num++) {
+                       if (sk->family != PF_INET)
+                               continue;
+                       pos += 128;
+                       if (pos < offset)
+                               continue;
+                       get_raw_sock(sk, tmpbuf, i);
+                       len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+                       if(len >= length)
+                               goto out;
+               }
+       }
+out:
+       SOCKHASH_UNLOCK_READ();
+       begin = len - (pos - offset);
+       *start = buffer + begin;
+       len -= begin;
+       if(len > length)
+               len = length;
+       if (len < 0)
+               len = 0; 
+       return len;
+}
+
 struct proto raw_prot = {
-       (struct sock *)&raw_prot,       /* sklist_next */
-       (struct sock *)&raw_prot,       /* sklist_prev */
        raw_close,                      /* close */
        udp_connect,                    /* connect */
        NULL,                           /* accept */
@@ -666,9 +698,7 @@ struct proto raw_prot = {
        raw_rcv_skb,                    /* backlog_rcv */
        raw_v4_hash,                    /* hash */
        raw_v4_unhash,                  /* unhash */
-       raw_v4_rehash,                  /* rehash */
-       NULL,                           /* good_socknum */
-       NULL,                           /* verify_bind */
+       NULL,                           /* get_port */
        128,                            /* max_header */
        0,                              /* retransmits */
        "RAW",                          /* name */
index af4165fce4c580e24a8829cc741e7d4f30f4f1dc..3080bc201aa24dceb7631ae3c66de8bff34b5df1 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:    $Id: tcp_input.c,v 1.169 1999/06/09 08:29:13 davem Exp $
+ * Version:    $Id: tcp_input.c,v 1.170 1999/07/02 11:26:28 davem Exp $
  *
  * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -917,25 +917,26 @@ extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
 /* Must be called only from BH context. */
 void tcp_timewait_kill(struct tcp_tw_bucket *tw)
 {
+       struct tcp_bind_bucket *tb = tw->tb;
+
        SOCKHASH_LOCK_WRITE_BH();
 
-       /* Unlink from various places. */
+       /* Disassociate with bind bucket. */
        if(tw->bind_next)
                tw->bind_next->bind_pprev = tw->bind_pprev;
        *(tw->bind_pprev) = tw->bind_next;
-       if(tw->tb->owners == NULL)
-               tcp_inc_slow_timer(TCP_SLT_BUCKETGC);
+       if (tb->owners == NULL) {
+               if (tb->next)
+                       tb->next->pprev = tb->pprev;
+               *(tb->pprev) = tb->next;
+               kmem_cache_free(tcp_bucket_cachep, tb);
+       }
 
+       /* Unlink from established hashes. */
        if(tw->next)
                tw->next->pprev = tw->pprev;
        *tw->pprev = tw->next;
 
-       /* We decremented the prot->inuse count when we entered TIME_WAIT
-        * and the sock from which this came was destroyed.
-        */
-       tw->sklist_next->sklist_prev = tw->sklist_prev;
-       tw->sklist_prev->sklist_next = tw->sklist_next;
-
        SOCKHASH_UNLOCK_WRITE_BH();
 
        /* Ok, now free it up. */
@@ -1040,11 +1041,9 @@ static __inline__ void tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *t
                sk->bind_next->bind_pprev = &tw->bind_next;
        tw->bind_pprev = sk->bind_pprev;
        *sk->bind_pprev = (struct sock *)tw;
+       sk->prev = NULL;
 
-       /* Step 3: Same for the protocol sklist. */
-       (tw->sklist_next = sk->sklist_next)->sklist_prev = (struct sock *)tw;
-       (tw->sklist_prev = sk->sklist_prev)->sklist_next = (struct sock *)tw;
-       sk->sklist_next = NULL;
+       /* Step 3: Un-charge protocol socket in-use count. */
        sk->prot->inuse--;
 
        /* Step 4: Hash TW into TIMEWAIT half of established hash table. */
index 564e859f259c0c5411b5088388a343f51ab90ef4..957ea9d38af530eeadec5c86681f3fd76372f2c5 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:    $Id: tcp_ipv4.c,v 1.180 1999/06/09 08:29:19 davem Exp $
+ * Version:    $Id: tcp_ipv4.c,v 1.181 1999/07/02 11:26:31 davem Exp $
  *
  *             IPv4 specific functions
  *
@@ -132,28 +132,9 @@ static __inline__ int tcp_sk_hashfn(struct sock *sk)
        return tcp_hashfn(laddr, lport, faddr, fport);
 }
 
-/* Invariant, sk->num is non-zero. */
-void tcp_bucket_unlock(struct sock *sk)
-{
-       struct tcp_bind_bucket *tb;
-       unsigned short snum = sk->num;
-
-       SOCKHASH_LOCK_WRITE();
-       for(tb = tcp_bhash[tcp_bhashfn(snum)]; tb; tb = tb->next) {
-               if(tb->port == snum) {
-                       if(tb->owners == NULL &&
-                          (tb->flags & TCPB_FLAG_LOCKED)) {
-                               tb->flags &= ~(TCPB_FLAG_LOCKED |
-                                              TCPB_FLAG_FASTREUSE);
-                               tcp_inc_slow_timer(TCP_SLT_BUCKETGC);
-                       }
-                       break;
-               }
-       }
-       SOCKHASH_UNLOCK_WRITE();
-}
-
-/* The sockhash lock must be held as a writer here. */
+/* Allocate and initialize a new TCP local port bind bucket.
+ * The sockhash lock must be held as a writer here.
+ */
 struct tcp_bind_bucket *tcp_bucket_create(unsigned short snum)
 {
        struct tcp_bind_bucket *tb;
@@ -163,7 +144,7 @@ struct tcp_bind_bucket *tcp_bucket_create(unsigned short snum)
                struct tcp_bind_bucket **head =
                        &tcp_bhash[tcp_bhashfn(snum)];
                tb->port = snum;
-               tb->flags = TCPB_FLAG_LOCKED;
+               tb->fastreuse = 0;
                tb->owners = NULL;
                if((tb->next = *head) != NULL)
                        tb->next->pprev = &tb->next;
@@ -186,133 +167,176 @@ static __inline__ int tcp_bucket_check(unsigned short snum)
        tb = tcp_bhash[tcp_bhashfn(snum)];
        for( ; (tb && (tb->port != snum)); tb = tb->next)
                ;
-       if(tb == NULL && tcp_bucket_create(snum) == NULL)
-               ret = 1;
+       ret = 0
+       if (tb == NULL) {
+               if ((tb = tcp_bucket_create(snum)) == NULL)
+                       ret = 1;
+       }
        SOCKHASH_UNLOCK_WRITE();
 
        return ret;
 }
 #endif
 
-static int tcp_v4_verify_bind(struct sock *sk, unsigned short snum)
+static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
+{
+       struct tcp_bind_bucket *tb = (struct tcp_bind_bucket *)sk->prev;
+
+       if ((child->bind_next = tb->owners) != NULL)
+               tb->owners->bind_pprev = &child->bind_next;
+       tb->owners = child;
+       child->bind_pprev = &tb->owners;
+       child->prev = (struct sock *) tb;
+}
+
+__inline__ void tcp_inherit_port(struct sock *sk, struct sock *child)
+{
+       SOCKHASH_LOCK_WRITE();
+       __tcp_inherit_port(sk, child);
+       SOCKHASH_UNLOCK_WRITE();
+}
+
+/* Obtain a reference to a local port for the given sock,
+ * if snum is zero it means select any available local port.
+ */
+static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
 {
        struct tcp_bind_bucket *tb;
-       int result = 0;
 
        SOCKHASH_LOCK_WRITE();
-       for(tb = tcp_bhash[tcp_bhashfn(snum)];
-           (tb && (tb->port != snum));
-           tb = tb->next)
-               ;
-       if(tb && tb->owners) {
-               /* Fast path for reuse ports, see include/net/tcp.h for a very
-                * detailed description of why this works, and why it is worth
-                * the effort at all. -DaveM
-                */
-               if((tb->flags & TCPB_FLAG_FASTREUSE)    &&
-                  (sk->reuse != 0)) {
-                       goto go_like_smoke;
+       if (snum == 0) {
+               int rover = tcp_port_rover;
+               int low = sysctl_local_port_range[0];
+               int high = sysctl_local_port_range[1];
+               int remaining = (high - low) + 1;
+
+               do {    rover++;
+                       if ((rover < low) || (rover > high))
+                               rover = low;
+                       tb = tcp_bhash[tcp_bhashfn(rover)];
+                       for ( ; tb; tb = tb->next)
+                               if (tb->port == rover)
+                                       goto next;
+                       break;
+               next:
+               } while (--remaining > 0);
+               tcp_port_rover = rover;
+
+               /* Exhausted local port range during search? */
+               if (remaining <= 0)
+                       goto fail;
+
+               /* OK, here is the one we will use. */
+               snum = rover;
+               tb = NULL;
+       } else {
+               for (tb = tcp_bhash[tcp_bhashfn(snum)];
+                    tb != NULL;
+                    tb = tb->next)
+                       if (tb->port == snum)
+                               break;
+       }
+       if (tb != NULL && tb->owners != NULL) {
+               if (tb->fastreuse != 0 && sk->reuse != 0) {
+                       goto success;
                } else {
-                       struct sock *sk2;
+                       struct sock *sk2 = tb->owners;
                        int sk_reuse = sk->reuse;
 
-                       /* We must walk the whole port owner list in this case. -DaveM */
-                       for(sk2 = tb->owners; sk2; sk2 = sk2->bind_next) {
+                       for( ; sk2 != NULL; sk2 = sk2->bind_next) {
                                if (sk->bound_dev_if == sk2->bound_dev_if) {
-                                       if(!sk_reuse || !sk2->reuse || sk2->state == TCP_LISTEN) {
-                                               if(!sk2->rcv_saddr              ||
-                                                  !sk->rcv_saddr               ||
-                                                  (sk2->rcv_saddr == sk->rcv_saddr))
+                                       if (!sk_reuse   ||
+                                           !sk2->reuse ||
+                                           sk2->state == TCP_LISTEN) {
+                                               if (!sk2->rcv_saddr     ||
+                                                   !sk->rcv_saddr      ||
+                                                   (sk2->rcv_saddr == sk->rcv_saddr))
                                                        break;
                                        }
                                }
                        }
-                       if(sk2 != NULL)
-                               result = 1;
+                       /* If we found a conflict, fail. */
+                       if (sk2 != NULL)
+                               goto fail;
                }
        }
-       if(result == 0) {
-               if(tb == NULL) {
-                       if((tb = tcp_bucket_create(snum)) == NULL)
-                               result = 1;
-                       else if (sk->reuse && sk->state != TCP_LISTEN)
-                               tb->flags |= TCPB_FLAG_FASTREUSE;
-               } else {
-                       /* It could be pending garbage collection, this
-                        * kills the race and prevents it from disappearing
-                        * out from under us by the time we use it.  -DaveM
-                        */
-                       if(tb->owners == NULL) {
-                               if (!(tb->flags & TCPB_FLAG_LOCKED)) {
-                                       tb->flags = (TCPB_FLAG_LOCKED |
-                                                    ((sk->reuse &&
-                                                      sk->state != TCP_LISTEN) ?
-                                                     TCPB_FLAG_FASTREUSE : 0));
-                                       tcp_dec_slow_timer(TCP_SLT_BUCKETGC);
-                               } else if (!(tb->flags & TCPB_FLAG_GOODSOCKNUM)) {
-                                       /* Someone is in between the bind
-                                        * and the actual connect or listen.
-                                        * See if it was a legitimate reuse
-                                        * and we are as well, else punt.
-                                        */
-                                       if (sk->reuse == 0 ||
-                                           !(tb->flags & TCPB_FLAG_FASTREUSE))
-                                               result = 1;
-                               } else
-                                       tb->flags &= ~TCPB_FLAG_GOODSOCKNUM;
-                       }
-               }
-       }
-go_like_smoke:
+       if (tb == NULL &&
+           (tb = tcp_bucket_create(snum)) == NULL)
+                       goto fail;
+       if (tb->owners == NULL) {
+               if (sk->reuse && sk->state != TCP_LISTEN)
+                       tb->fastreuse = 1;
+               else
+                       tb->fastreuse = 0;
+       } else if (tb->fastreuse &&
+                  ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))
+               tb->fastreuse = 0;
+success:
+       sk->num = snum;
+       if ((sk->bind_next = tb->owners) != NULL)
+               tb->owners->bind_pprev = &sk->bind_next;
+       tb->owners = sk;
+       sk->bind_pprev = &tb->owners;
+       sk->prev = (struct sock *) tb;
+
        SOCKHASH_UNLOCK_WRITE();
-       return result;
+       return 0;
+
+fail:
+       SOCKHASH_UNLOCK_WRITE();
+       return 1;
 }
 
-unsigned short tcp_good_socknum(void)
+/* Get rid of any references to a local port held by the
+ * given sock.
+ */
+__inline__ void __tcp_put_port(struct sock *sk)
 {
        struct tcp_bind_bucket *tb;
-       int low = sysctl_local_port_range[0];
-       int high = sysctl_local_port_range[1];
-       int remaining = (high - low) + 1;
-       int rover;
 
+       tb = (struct tcp_bind_bucket *) sk->prev;
+       if (sk->bind_next)
+               sk->bind_next->bind_pprev = sk->bind_pprev;
+       *(sk->bind_pprev) = sk->bind_next;
+       sk->prev = NULL;
+       if (tb->owners == NULL) {
+               if (tb->next)
+                       tb->next->pprev = tb->pprev;
+               *(tb->pprev) = tb->next;
+               kmem_cache_free(tcp_bucket_cachep, tb);
+       }
+}
+
+void tcp_put_port(struct sock *sk)
+{
        SOCKHASH_LOCK_WRITE();
-       rover = tcp_port_rover;
-       do {
-               rover += 1;
-               if((rover < low) || (rover > high))
-                       rover = low;
-               tb = tcp_bhash[tcp_bhashfn(rover)];
-               for( ; tb; tb = tb->next) {
-                       if(tb->port == rover)
-                               goto next;
-               }
-               break;
-       next:
-       } while(--remaining > 0);
-       tcp_port_rover = rover;
-       tb = NULL;
-       if((remaining <= 0) || ((tb = tcp_bucket_create(rover)) == NULL))
-               rover = 0;
-       if (tb != NULL)
-               tb->flags |= TCPB_FLAG_GOODSOCKNUM;
+       __tcp_put_port(sk);
        SOCKHASH_UNLOCK_WRITE();
+}
+
+static __inline__ void __tcp_v4_hash(struct sock *sk)
+{
+       struct sock **skp;
 
-       return rover;
+       if(sk->state == TCP_LISTEN)
+               skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
+       else
+               skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))];
+
+       if((sk->next = *skp) != NULL)
+               (*skp)->pprev = &sk->next;
+       *skp = sk;
+       sk->pprev = skp;
+       sk->prot->inuse++;
+       if(sk->prot->highestinuse < sk->prot->inuse)
+               sk->prot->highestinuse = sk->prot->inuse;
 }
 
 static void tcp_v4_hash(struct sock *sk)
 {
        if (sk->state != TCP_CLOSE) {
-               struct sock **skp;
-
                SOCKHASH_LOCK_WRITE();
-               skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))];
-               if((sk->next = *skp) != NULL)
-                       (*skp)->pprev = &sk->next;
-               *skp = sk;
-               sk->pprev = skp;
-               tcp_sk_bindify(sk);
+               __tcp_v4_hash(sk);
                SOCKHASH_UNLOCK_WRITE();
        }
 }
@@ -325,39 +349,9 @@ static void tcp_v4_unhash(struct sock *sk)
                        sk->next->pprev = sk->pprev;
                *sk->pprev = sk->next;
                sk->pprev = NULL;
+               sk->prot->inuse--;
                tcp_reg_zap(sk);
-               tcp_sk_unbindify(sk);
-       }
-       SOCKHASH_UNLOCK_WRITE();
-}
-
-static void tcp_v4_rehash(struct sock *sk)
-{
-       unsigned char state;
-
-       SOCKHASH_LOCK_WRITE();
-       state = sk->state;
-       if(sk->pprev != NULL) {
-               if(sk->next)
-                       sk->next->pprev = sk->pprev;
-               *sk->pprev = sk->next;
-               sk->pprev = NULL;
-               tcp_reg_zap(sk);
-       }
-       if(state != TCP_CLOSE) {
-               struct sock **skp;
-
-               if(state == TCP_LISTEN)
-                       skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
-               else
-                       skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))];
-
-               if((sk->next = *skp) != NULL)
-                       (*skp)->pprev = &sk->next;
-               *skp = sk;
-               sk->pprev = skp;
-               if(state == TCP_LISTEN)
-                       tcp_sk_bindify(sk);
+               __tcp_put_port(sk);
        }
        SOCKHASH_UNLOCK_WRITE();
 }
@@ -1344,7 +1338,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
 #endif
 
                memcpy(newsk, sk, sizeof(*newsk));
-               newsk->sklist_next = NULL;
                newsk->state = TCP_SYN_RECV;
 
                /* Clone the TCP header template */
@@ -1536,8 +1529,11 @@ struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
        if (newsk->sndbuf < (3 * newtp->pmtu_cookie))
                newsk->sndbuf = min ((3 * newtp->pmtu_cookie), sysctl_wmem_max);
  
-       tcp_v4_hash(newsk);
-       add_to_prot_sklist(newsk);
+       SOCKHASH_LOCK_WRITE();
+       __tcp_v4_hash(newsk);
+       __tcp_inherit_port(sk, newsk);
+       SOCKHASH_UNLOCK_WRITE();
+
        sk->data_ready(sk, 0); /* Deliver SIGIO */ 
 
        return newsk;
@@ -1780,6 +1776,25 @@ do_time_wait:
        goto discard_it;
 }
 
+static void __tcp_v4_rehash(struct sock *sk)
+{
+       struct sock **skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))];
+
+       SOCKHASH_LOCK_WRITE();
+       if(sk->pprev) {
+               if(sk->next)
+                       sk->next->pprev = sk->pprev;
+               *sk->pprev = sk->next;
+               sk->pprev = NULL;
+               tcp_reg_zap(sk);
+       }
+       if((sk->next = *skp) != NULL)
+               (*skp)->pprev = &sk->next;
+       *skp = sk;
+       sk->pprev = skp;
+       SOCKHASH_UNLOCK_WRITE();
+}
+
 int tcp_v4_rebuild_header(struct sock *sk)
 {
        struct rtable *rt = (struct rtable *)sk->dst_cache;
@@ -1853,7 +1868,12 @@ do_rewrite:
 
                sk->saddr = new_saddr;
                sk->rcv_saddr = new_saddr;
-               tcp_v4_rehash(sk);
+
+               /* XXX The only one ugly spot where we need to
+                * XXX really change the sockets identity after
+                * XXX it has entered the hashes. -DaveM
+                */
+               __tcp_v4_rehash(sk);
        } 
         
        return 0;
@@ -1948,20 +1968,192 @@ static int tcp_v4_destroy_sock(struct sock *sk)
        while((skb = __skb_dequeue(&tp->out_of_order_queue)) != NULL)
                kfree_skb(skb);
 
-       /* Clean up a locked TCP bind bucket, this only happens if a
+       /* Clean up a referenced TCP bind bucket, this only happens if a
         * port is allocated for a socket, but it never fully connects.
-        * In which case we will find num to be non-zero and daddr to
-        * be zero.
         */
-       if(sk->daddr == 0 && sk->num != 0)
-               tcp_bucket_unlock(sk);
+       if(sk->prev != NULL)
+               tcp_put_port(sk);
 
        return 0;
 }
 
+/* Proc filesystem TCP sock list dumping. */
+static void get_openreq(struct sock *sk, struct open_request *req, char *tmpbuf, int i)
+{
+       sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
+               " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u",
+               i,
+               (long unsigned int)req->af.v4_req.loc_addr,
+               ntohs(sk->sport),
+               (long unsigned int)req->af.v4_req.rmt_addr,
+               ntohs(req->rmt_port),
+               TCP_SYN_RECV,
+               0,0, /* could print option size, but that is af dependent. */
+               1,   /* timers active (only the expire timer) */  
+               (unsigned long)(req->expires - jiffies), 
+               req->retrans,
+               sk->socket ? sk->socket->inode->i_uid : 0,
+               0,  /* non standard timer */  
+               0 /* open_requests have no inode */
+               ); 
+}
+
+static void get_tcp_sock(struct sock *sp, char *tmpbuf, int i)
+{
+       unsigned int dest, src;
+       __u16 destp, srcp;
+       int timer_active, timer_active1, timer_active2;
+       unsigned long timer_expires;
+       struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
+
+       dest  = sp->daddr;
+       src   = sp->rcv_saddr;
+       destp = ntohs(sp->dport);
+       srcp  = ntohs(sp->sport);
+       timer_active1 = tp->retransmit_timer.prev != NULL;
+       timer_active2 = sp->timer.prev != NULL;
+       timer_active    = 0;
+       timer_expires   = (unsigned) -1;
+       if (timer_active1 && tp->retransmit_timer.expires < timer_expires) {
+               timer_active    = 1;
+               timer_expires   = tp->retransmit_timer.expires;
+       }
+       if (timer_active2 && sp->timer.expires < timer_expires) {
+               timer_active    = 2;
+               timer_expires   = sp->timer.expires;
+       }
+       if(timer_active == 0)
+               timer_expires = jiffies;
+
+       sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+               " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+               i, src, srcp, dest, destp, sp->state, 
+               tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
+               timer_active, timer_expires-jiffies,
+               tp->retransmits,
+               sp->socket ? sp->socket->inode->i_uid : 0,
+               timer_active ? sp->timeout : 0,
+               sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+static void get_timewait_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
+{
+       extern int tcp_tw_death_row_slot;
+       unsigned int dest, src;
+       __u16 destp, srcp;
+       int slot_dist;
+
+       dest  = tw->daddr;
+       src   = tw->rcv_saddr;
+       destp = ntohs(tw->dport);
+       srcp  = ntohs(tw->sport);
+
+       slot_dist = tw->death_slot;
+       if(slot_dist > tcp_tw_death_row_slot)
+               slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot;
+       else
+               slot_dist = tcp_tw_death_row_slot - slot_dist;
+
+       sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+               " %02X %08X:%08X %02X:%08X %08X %5d %8d %d",
+               i, src, srcp, dest, destp, TCP_TIME_WAIT, 0, 0,
+               3, slot_dist * TCP_TWKILL_PERIOD, 0, 0, 0, 0);
+}
+
+int tcp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+       int len = 0, num = 0, i;
+       off_t begin, pos = 0;
+       char tmpbuf[129];
+
+       if (offset < 128)
+               len += sprintf(buffer, "%-127s\n",
+                              "  sl  local_address rem_address   st tx_queue "
+                              "rx_queue tr tm->when retrnsmt   uid  timeout inode");
+
+       pos = 128;
+       SOCKHASH_LOCK_READ();
+
+       /* First, walk listening socket table. */
+       for(i = 0; i < TCP_LHTABLE_SIZE; i++) {
+               struct sock *sk = tcp_listening_hash[i];
+
+               for (sk = tcp_listening_hash[i]; sk; sk = sk->next, num++) {
+                       struct open_request *req;
+                       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+                       if (sk->family != PF_INET)
+                               continue;
+                       pos += 128;
+                       if (pos >= offset) {
+                               get_tcp_sock(sk, tmpbuf, num);
+                               len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+                               if (len >= length)
+                                       goto out;
+                       }
+                       for (req = tp->syn_wait_queue; req; req = req->dl_next, num++) {
+                               if (req->sk)
+                                       continue;
+                               pos += 128;
+                               if (pos < offset)
+                                       continue;
+                               get_openreq(sk, req, tmpbuf, num);
+                               len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+                               if(len >= length) 
+                                       goto out;
+                       }
+               }
+       }
+
+       /* Next, walk established hash chain. */
+       for (i = 0; i < (tcp_ehash_size >> 1); i++) {
+               struct sock *sk;
+
+               for(sk = tcp_ehash[i]; sk; sk = sk->next, num++) {
+                       if (sk->family != PF_INET)
+                               continue;
+                       pos += 128;
+                       if (pos < offset)
+                               continue;
+                       get_tcp_sock(sk, tmpbuf, num);
+                       len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+                       if(len >= length)
+                               goto out;
+               }
+       }
+
+       /* Finally, walk time wait buckets. */
+       for (i = (tcp_ehash_size>>1); i < tcp_ehash_size; i++) {
+               struct tcp_tw_bucket *tw;
+               for (tw = (struct tcp_tw_bucket *)tcp_ehash[i];
+                    tw != NULL;
+                    tw = (struct tcp_tw_bucket *)tw->next, num++) {
+                       if (tw->family != PF_INET)
+                               continue;
+                       pos += 128;
+                       if (pos < offset)
+                               continue;
+                       get_timewait_sock(tw, tmpbuf, num);
+                       len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+                       if(len >= length)
+                               goto out;
+               }
+       }
+
+out:
+       SOCKHASH_UNLOCK_READ();
+
+       begin = len - (pos - offset);
+       *start = buffer + begin;
+       len -= begin;
+       if(len > length)
+               len = length;
+       if (len < 0)
+               len = 0; 
+       return len;
+}
+
 struct proto tcp_prot = {
-       (struct sock *)&tcp_prot,       /* sklist_next */
-       (struct sock *)&tcp_prot,       /* sklist_prev */
        tcp_close,                      /* close */
        tcp_v4_connect,                 /* connect */
        tcp_accept,                     /* accept */
@@ -1981,9 +2173,7 @@ struct proto tcp_prot = {
        tcp_v4_do_rcv,                  /* backlog_rcv */
        tcp_v4_hash,                    /* hash */
        tcp_v4_unhash,                  /* unhash */
-       tcp_v4_rehash,                  /* rehash */
-       tcp_good_socknum,               /* good_socknum */
-       tcp_v4_verify_bind,             /* verify_bind */
+       tcp_v4_get_port,                /* get_port */
        128,                            /* max_header */
        0,                              /* retransmits */
        "TCP",                          /* name */
index d23eef1431778278b376f22d1386c425b13350a1..05a92f7f772c65c91a23249959076a84eac1fcac 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             Implementation of the Transmission Control Protocol(TCP).
  *
- * Version:    $Id: tcp_timer.c,v 1.64 1999/05/27 00:37:31 davem Exp $
+ * Version:    $Id: tcp_timer.c,v 1.65 1999/07/02 11:26:35 davem Exp $
  *
  * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -31,7 +31,6 @@ int sysctl_tcp_retries2 = TCP_RETR2;
 static void tcp_sltimer_handler(unsigned long);
 static void tcp_syn_recv_timer(unsigned long);
 static void tcp_keepalive(unsigned long data);
-static void tcp_bucketgc(unsigned long);
 static void tcp_twkill(unsigned long);
 
 struct timer_list      tcp_slow_timer = {
@@ -44,8 +43,7 @@ struct timer_list     tcp_slow_timer = {
 struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX] = {
        {ATOMIC_INIT(0), TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer},/* SYNACK    */
        {ATOMIC_INIT(0), TCP_KEEPALIVE_PERIOD, 0, tcp_keepalive},  /* KEEPALIVE */
-       {ATOMIC_INIT(0), TCP_TWKILL_PERIOD, 0, tcp_twkill},        /* TWKILL    */
-       {ATOMIC_INIT(0), TCP_BUCKETGC_PERIOD, 0, tcp_bucketgc}     /* BUCKETGC  */
+       {ATOMIC_INIT(0), TCP_TWKILL_PERIOD, 0, tcp_twkill}         /* TWKILL    */
 };
 
 const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
@@ -252,43 +250,6 @@ static __inline__ int tcp_keepopen_proc(struct sock *sk)
        return res;
 }
 
-/* Garbage collect TCP bind buckets. */
-static void tcp_bucketgc(unsigned long data)
-{
-       int i, reaped = 0;;
-
-       SOCKHASH_LOCK_WRITE_BH();
-       for(i = 0; i < tcp_bhash_size; i++) {
-               struct tcp_bind_bucket *tb = tcp_bhash[i];
-
-               while(tb) {
-                       struct tcp_bind_bucket *next = tb->next;
-
-                       if((tb->owners == NULL) &&
-                          !(tb->flags & TCPB_FLAG_LOCKED)) {
-                               reaped++;
-
-                               /* Unlink bucket. */
-                               if(tb->next)
-                                       tb->next->pprev = tb->pprev;
-                               *tb->pprev = tb->next;
-
-                               /* Finally, free it up. */
-                               kmem_cache_free(tcp_bucket_cachep, tb);
-                       }
-                       tb = next;
-               }
-       }
-       SOCKHASH_UNLOCK_WRITE_BH();
-
-       if(reaped != 0) {
-               struct tcp_sl_timer *slt = (struct tcp_sl_timer *)data;
-
-               /* Eat timer references. */
-               atomic_sub(reaped, &slt->count);
-       }
-}
-
 /* Kill off TIME_WAIT sockets once their lifetime has expired. */
 int tcp_tw_death_row_slot = 0;
 static struct tcp_tw_bucket *tcp_tw_death_row[TCP_TWKILL_SLOTS] =
index 516304d4bd8f31c4d11c7e5d89b7440142a81608..2696a05cdde1d665720bb3e1c2650ff3a039fcc2 100644 (file)
@@ -5,7 +5,7 @@
  *
  *             The User Datagram Protocol (UDP).
  *
- * Version:    $Id: udp.c,v 1.70 1999/06/13 05:55:16 davem Exp $
+ * Version:    $Id: udp.c,v 1.71 1999/07/02 11:26:33 davem Exp $
  *
  * Authors:    Ross Biro, <bir7@leland.Stanford.Edu>
  *             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -123,164 +123,102 @@ struct udp_mib          udp_statistics;
 
 struct sock *udp_hash[UDP_HTABLE_SIZE];
 
-static int udp_v4_verify_bind(struct sock *sk, unsigned short snum)
-{
-       struct sock *sk2;
-       int retval = 0, sk_reuse = sk->reuse;
-
-       SOCKHASH_LOCK_READ();
-       for(sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) {
-               if((sk2->num == snum) && (sk2 != sk)) {
-                       unsigned char state = sk2->state;
-                       int sk2_reuse = sk2->reuse;
-
-                       /* Two sockets can be bound to the same port if they're
-                        * bound to different interfaces.
-                        */
-
-                       if(sk2->bound_dev_if != sk->bound_dev_if)
-                               continue;
+/* Shared by v4/v6 udp. */
+int udp_port_rover = 0;
 
-                       if(!sk2->rcv_saddr || !sk->rcv_saddr) {
-                               if((!sk2_reuse)                 ||
-                                  (!sk_reuse)                  ||
-                                  (state == TCP_LISTEN)) {
-                                       retval = 1;
-                                       break;
-                               }
-                       } else if(sk2->rcv_saddr == sk->rcv_saddr) {
-                               if((!sk_reuse)                  ||
-                                  (!sk2_reuse)                 ||
-                                  (state == TCP_LISTEN)) {
-                                       retval = 1;
-                                       break;
-                               }
+static int udp_v4_get_port(struct sock *sk, unsigned short snum)
+{
+       SOCKHASH_LOCK_WRITE();
+       if (snum == 0) {
+               int best_size_so_far, best, result, i;
+
+               if (udp_port_rover > sysctl_local_port_range[1] ||
+                   udp_port_rover < sysctl_local_port_range[0])
+                       udp_port_rover = sysctl_local_port_range[0];
+               best_size_so_far = 32767;
+               best = result = udp_port_rover;
+               for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
+                       struct sock *sk;
+                       int size;
+
+                       sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+                       if (!sk) {
+                               if (result > sysctl_local_port_range[1])
+                                       result = sysctl_local_port_range[0] +
+                                               ((result - sysctl_local_port_range[0]) &
+                                                (UDP_HTABLE_SIZE - 1));
+                               goto gotit;
                        }
+                       size = 0;
+                       do {
+                               if (++size >= best_size_so_far)
+                                       goto next;
+                       } while ((sk = sk->next) != NULL);
+                       best_size_so_far = size;
+                       best = result;
+               next:
+               }
+               result = best;
+               for(;; result += UDP_HTABLE_SIZE) {
+                       if (result > sysctl_local_port_range[1])
+                               result = sysctl_local_port_range[0]
+                                       + ((result - sysctl_local_port_range[0]) &
+                                          (UDP_HTABLE_SIZE - 1));
+                       if (!udp_lport_inuse(result))
+                               break;
+               }
+gotit:
+               udp_port_rover = snum = result;
+       } else {
+               struct sock *sk2;
+
+               for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+                    sk2 != NULL;
+                    sk2 = sk2->next) {
+                       if (sk2->num == snum &&
+                           sk2 != sk &&
+                           sk2->bound_dev_if == sk->bound_dev_if &&
+                           (!sk2->rcv_saddr ||
+                            !sk->rcv_saddr ||
+                            sk2->rcv_saddr == sk->rcv_saddr) &&
+                           (!sk2->reuse || !sk->reuse))
+                               goto fail;
                }
        }
-       SOCKHASH_UNLOCK_READ();
-       return retval;
-}
-
-static inline int udp_lport_inuse(u16 num)
-{
-       struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)];
-
-       for(; sk != NULL; sk = sk->next) {
-               if(sk->num == num)
-                       return 1;
-       }
+       sk->num = snum;
+       SOCKHASH_UNLOCK_WRITE();
        return 0;
-}
-
-/* Shared by v4/v6 udp. */
-unsigned short udp_good_socknum(void)
-{
-       int result;
-       static int start = 0;
-       int i, best, best_size_so_far;
-
-       SOCKHASH_LOCK_READ();
-        if (start > sysctl_local_port_range[1] || start < sysctl_local_port_range[0])
-                start = sysctl_local_port_range[0];
-
-       best_size_so_far = 32767;       /* "big" num */
-        best = result = start;
-
-        for(i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
-               struct sock *sk;
-               int size;
-
-               sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
-
-                if(!sk) {
-                        if (result > sysctl_local_port_range[1])
-                                result = sysctl_local_port_range[0]
-                                        + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1));
-                       goto out;
-                }
-
-               /* Is this one better than our best so far? */
-               size = 0;
-               do {
-                       if(++size >= best_size_so_far)
-                               goto next;
-               } while((sk = sk->next) != NULL);
-               best_size_so_far = size;
-               best = result;
-        next:
-       }
 
-       result = best;
-
-        for(;; result += UDP_HTABLE_SIZE) {
-                /* Get into range (but preserve hash bin)... */
-                if (result > sysctl_local_port_range[1])
-                        result = sysctl_local_port_range[0]
-                                + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1));
-                if (!udp_lport_inuse(result))
-                        break;
-        }
-out:
-       start = result;
-       SOCKHASH_UNLOCK_READ();
-       return result;
+fail:
+       SOCKHASH_UNLOCK_WRITE();
+       return 1;
 }
 
 static void udp_v4_hash(struct sock *sk)
 {
-       struct sock **skp;
-       int num = sk->num;
-
-       num &= (UDP_HTABLE_SIZE - 1);
-       skp = &udp_hash[num];
+       struct sock **skp = &udp_hash[sk->num & (UDP_HTABLE_SIZE - 1)];
 
        SOCKHASH_LOCK_WRITE();
-       sk->next = *skp;
+       if ((sk->next = *skp) != NULL)
+               (*skp)->pprev = &sk->next;
        *skp = sk;
-       sk->hashent = num;
+       sk->pprev = skp;
+       sk->prot->inuse++;
+       if(sk->prot->highestinuse < sk->prot->inuse)
+               sk->prot->highestinuse = sk->prot->inuse;
        SOCKHASH_UNLOCK_WRITE();
 }
 
 static void udp_v4_unhash(struct sock *sk)
 {
-       struct sock **skp;
-       int num = sk->num;
-
-       num &= (UDP_HTABLE_SIZE - 1);
-       skp = &udp_hash[num];
-
-       SOCKHASH_LOCK_WRITE();
-       while(*skp != NULL) {
-               if(*skp == sk) {
-                       *skp = sk->next;
-                       break;
-               }
-               skp = &((*skp)->next);
-       }
-       SOCKHASH_UNLOCK_WRITE();
-}
-
-static void udp_v4_rehash(struct sock *sk)
-{
-       struct sock **skp;
-       int num = sk->num;
-       int oldnum = sk->hashent;
-
-       num &= (UDP_HTABLE_SIZE - 1);
-       skp = &udp_hash[oldnum];
-
        SOCKHASH_LOCK_WRITE();
-       while(*skp != NULL) {
-               if(*skp == sk) {
-                       *skp = sk->next;
-                       break;
-               }
-               skp = &((*skp)->next);
+       if (sk->pprev) {
+               if (sk->next)
+                       sk->next->pprev = sk->pprev;
+               *sk->pprev = sk->next;
+               sk->pprev = NULL;
+               sk->prot->inuse--;
        }
-       sk->next = udp_hash[num];
-       udp_hash[num] = sk;
-       sk->hashent = num;
        SOCKHASH_UNLOCK_WRITE();
 }
 
@@ -653,7 +591,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
        if (msg->msg_name) {
                struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
                if (msg->msg_namelen < sizeof(*usin))
-                       return(-EINVAL);
+                       return -EINVAL;
                if (usin->sin_family != AF_INET)
                        return -EINVAL;
 
@@ -788,7 +726,6 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
                {
                        unsigned long amount;
 
-                       if (sk->state == TCP_LISTEN) return(-EINVAL);
                        amount = sock_wspace(sk);
                        return put_user(amount, (int *)arg);
                }
@@ -798,8 +735,6 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
                        struct sk_buff *skb;
                        unsigned long amount;
 
-                       if (sk->state == TCP_LISTEN)
-                               return(-EINVAL);
                        amount = 0;
                        /* N.B. Is this interrupt safe??
                           -> Yes. Interrupts do not remove skbs. --ANK (980725)
@@ -817,7 +752,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
                }
 
                default:
-                       return(-ENOIOCTLCMD);
+                       return -ENOIOCTLCMD;
        }
        return(0);
 }
@@ -945,7 +880,7 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 
        
        if (addr_len < sizeof(*usin)) 
-               return(-EINVAL);
+               return -EINVAL;
 
        /*
         *      1003.1g - break association.
@@ -961,7 +896,7 @@ int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        }
 
        if (usin->sin_family && usin->sin_family != AF_INET) 
-               return(-EAFNOSUPPORT);
+               return -EAFNOSUPPORT;
 
        dst_release(xchg(&sk->dst_cache, NULL));
 
@@ -1226,9 +1161,69 @@ csum_error:
        return(0);
 }
 
+static void get_udp_sock(struct sock *sp, char *tmpbuf, int i)
+{
+       unsigned int dest, src;
+       __u16 destp, srcp;
+       int timer_active;
+       unsigned long timer_expires;
+
+       dest  = sp->daddr;
+       src   = sp->rcv_saddr;
+       destp = ntohs(sp->dport);
+       srcp  = ntohs(sp->sport);
+       timer_active = (sp->timer.prev != NULL) ? 2 : 0;
+       timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies);
+       sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+               " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+               i, src, srcp, dest, destp, sp->state, 
+               atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
+               timer_active, timer_expires-jiffies, 0,
+               sp->socket->inode->i_uid, timer_active ? sp->timeout : 0,
+               sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+int udp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+       int len = 0, num = 0, i;
+       off_t pos = 0;
+       off_t begin;
+       char tmpbuf[129];
+
+       if (offset < 128) 
+               len += sprintf(buffer, "%-127s\n",
+                              "  sl  local_address rem_address   st tx_queue "
+                              "rx_queue tr tm->when retrnsmt   uid  timeout inode");
+       pos = 128;
+       SOCKHASH_LOCK_READ();
+       for (i = 0; i < UDP_HTABLE_SIZE; i++) {
+               struct sock *sk;
+
+               for (sk = udp_hash[i]; sk; sk = sk->next, num++) {
+                       if (sk->family != PF_INET)
+                               continue;
+                       pos += 128;
+                       if (pos < offset)
+                               continue;
+                       get_udp_sock(sk, tmpbuf, i);
+                       len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+                       if(len >= length)
+                               goto out;
+               }
+       }
+out:
+       SOCKHASH_UNLOCK_READ();
+       begin = len - (pos - offset);
+       *start = buffer + begin;
+       len -= begin;
+       if(len > length)
+               len = length;
+       if (len < 0)
+               len = 0; 
+       return len;
+}
+
 struct proto udp_prot = {
-       (struct sock *)&udp_prot,       /* sklist_next */
-       (struct sock *)&udp_prot,       /* sklist_prev */
        udp_close,                      /* close */
        udp_connect,                    /* connect */
        NULL,                           /* accept */
@@ -1248,9 +1243,7 @@ struct proto udp_prot = {
        udp_queue_rcv_skb,              /* backlog_rcv */
        udp_v4_hash,                    /* hash */
        udp_v4_unhash,                  /* unhash */
-       udp_v4_rehash,                  /* rehash */
-       udp_good_socknum,               /* good_socknum */
-       udp_v4_verify_bind,             /* verify_bind */
+       udp_v4_get_port,                /* good_socknum */
        128,                            /* max_header */
        0,                              /* retransmits */
        "UDP",                          /* name */
index f7f50df869ae80c11491016d0eb927622134ff05..f565921d31a3cacab955f16806365e27dcd15193 100644 (file)
@@ -7,7 +7,7 @@
  *
  *     Adapted from linux/net/ipv4/af_inet.c
  *
- *     $Id: af_inet6.c,v 1.44 1999/06/09 08:29:29 davem Exp $
+ *     $Id: af_inet6.c,v 1.45 1999/07/02 11:26:38 davem Exp $
  *
  *     This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -157,7 +157,6 @@ static int inet6_create(struct socket *sock, int protocol)
                 */
                sk->sport = ntohs(sk->num);
                sk->prot->hash(sk);
-               add_to_prot_sklist(sk);
        }
 
        if (sk->prot->init) {
@@ -205,13 +204,13 @@ static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                
        addr_type = ipv6_addr_type(&addr->sin6_addr);
        if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
-               return(-EINVAL);
+               return -EINVAL;
 
        /* Check if the address belongs to the host. */
        if (addr_type == IPV6_ADDR_MAPPED) {
                v4addr = addr->sin6_addr.s6_addr32[3];
                if (inet_addr_type(v4addr) != RTN_LOCAL)
-                       return(-EADDRNOTAVAIL);
+                       return -EADDRNOTAVAIL;
        } else {
                if (addr_type != IPV6_ADDR_ANY) {
                        /* ipv4 addr of the socket is invalid.  Only the
@@ -220,7 +219,7 @@ static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                        v4addr = LOOPBACK4_IPV6;
                        if (!(addr_type & IPV6_ADDR_MULTICAST)) {
                                if (ipv6_chk_addr(&addr->sin6_addr, NULL, 0) == NULL)
-                                       return(-EADDRNOTAVAIL);
+                                       return -EADDRNOTAVAIL;
                        }
                }
        }
@@ -236,21 +235,17 @@ static int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                       sizeof(struct in6_addr));
 
        snum = ntohs(addr->sin6_port);
-       if (snum == 0) 
-               snum = sk->prot->good_socknum();
-       if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
-               return(-EACCES);
+       if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+               return -EACCES;
 
        /* Make sure we are allowed to bind here. */
-       if(sk->prot->verify_bind(sk, snum))
+       if(sk->prot->get_port(sk, snum) != 0)
                return -EADDRINUSE;
 
-       sk->num = snum;
        sk->sport = ntohs(sk->num);
        sk->dport = 0;
        sk->daddr = 0;
-       sk->prot->rehash(sk);
-       add_to_prot_sklist(sk);
+       sk->prot->hash(sk);
 
        return(0);
 }
@@ -318,7 +313,7 @@ static int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
        sk = sock->sk;
        if (peer) {
                if (!tcp_connected(sk->state))
-                       return(-ENOTCONN);
+                       return -ENOTCONN;
                sin->sin6_port = sk->dport;
                memcpy(&sin->sin6_addr, &sk->net_pinfo.af_inet6.daddr,
                       sizeof(struct in6_addr));
index b83bdc34b02fbc64bd3f67550c26b790fe80e964..09845703b4edf4fb963a588fd6d2c63f7c2cb592 100644 (file)
@@ -7,7 +7,7 @@
  *             PROC file system.  This is very similar to the IPv4 version,
  *             except it reports the sockets in the INET6 address family.
  *
- * Version:    $Id: proc.c,v 1.10 1999/05/27 00:38:14 davem Exp $
+ * Version:    $Id: proc.c,v 1.11 1999/07/02 11:26:45 davem Exp $
  *
  * Authors:    David S. Miller (davem@caip.rutgers.edu)
  *
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 
-/* This is the main implementation workhorse of all these routines. */
-static int get__netinfo6(struct proto *pro, char *buffer, int format, char **start,
-                        off_t offset, int length)
-{
-       struct sock *sp;
-       struct tcp_opt *tp;
-       int timer_active, timer_active1, timer_active2;
-       unsigned long timer_expires;
-       struct in6_addr *dest, *src;
-       unsigned short destp, srcp;
-       int len = 0, i = 0;
-       off_t pos = 0;
-       off_t begin;
-       char tmpbuf[150];
-
-       if(offset < 149)
-               len += sprintf(buffer, "%-148s\n",
-                              "  sl  "                                         /* 6 */
-                              "local_address                         "         /* 38 */
-                              "remote_address                        "         /* 38 */
-                              "st tx_queue rx_queue tr tm->when retrnsmt"      /* 41 */
-                              "   uid  timeout inode");                        /* 21 */
-                                                                               /*----*/
-                                                                               /*144 */
-
-       pos = 149;
-       SOCKHASH_LOCK_READ();
-       sp = pro->sklist_next;
-       while(sp != (struct sock *)pro) {
-               struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sp;
-               int tw_bucket = 0;
-
-               pos += 149;
-               if(pos < offset)
-                       goto next;
-               tp = &(sp->tp_pinfo.af_tcp);
-               if((format == 0) && (sp->state == TCP_TIME_WAIT)) {
-                       tw_bucket = 1;
-                       dest  = &tw->v6_daddr;
-                       src   = &tw->v6_rcv_saddr;
-               } else {
-                       dest  = &sp->net_pinfo.af_inet6.daddr;
-                       src   = &sp->net_pinfo.af_inet6.rcv_saddr;
-               }
-               destp = ntohs(sp->dport);
-               srcp  = ntohs(sp->sport);
-
-               if((format == 0) && (sp->state == TCP_TIME_WAIT)) {
-                       extern int tcp_tw_death_row_slot;
-                       int slot_dist;
-
-                       timer_active1   = timer_active2 = 0;
-                       timer_active    = 3;
-                       slot_dist       = tw->death_slot;
-                       if(slot_dist > tcp_tw_death_row_slot)
-                               slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot;
-                       else
-                               slot_dist = tcp_tw_death_row_slot - slot_dist;
-                       timer_expires   = jiffies + (slot_dist * TCP_TWKILL_PERIOD);
-               } else {
-                       timer_active1 = tp->retransmit_timer.prev != NULL;
-                       timer_active2 = sp->timer.prev != NULL;
-                       timer_active = 0;
-                       timer_expires = (unsigned) -1;
-               }
-               if(timer_active1 && tp->retransmit_timer.expires < timer_expires) {
-                       timer_active = timer_active1;
-                       timer_expires = tp->retransmit_timer.expires;
-               }
-               if(timer_active2 && sp->timer.expires < timer_expires) {
-                       timer_active = timer_active2;
-                       timer_expires = sp->timer.expires;
-               }
-               if(timer_active == 0)
-                       timer_expires = jiffies;
-               sprintf(tmpbuf, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
-                       "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
-                       i,
-                       src->s6_addr32[0], src->s6_addr32[1],
-                       src->s6_addr32[2], src->s6_addr32[3], srcp,
-                       dest->s6_addr32[0], dest->s6_addr32[1],
-                       dest->s6_addr32[2], dest->s6_addr32[3], destp,
-                       sp->state,
-                       (tw_bucket ?
-                        0 :
-                        (format == 0) ?
-                        tp->write_seq-tp->snd_una :
-                        atomic_read(&sp->wmem_alloc)),
-                       (tw_bucket ?
-                        0 :
-                        (format == 0) ?
-                        tp->rcv_nxt-tp->copied_seq :
-                        atomic_read(&sp->rmem_alloc)),
-                       timer_active, timer_expires-jiffies,
-                       (tw_bucket ? 0 : tp->retransmits),
-                       ((!tw_bucket && sp->socket) ?
-                        sp->socket->inode->i_uid : 0),
-                       (!tw_bucket && timer_active) ? sp->timeout : 0,
-                       ((!tw_bucket && sp->socket) ?
-                        sp->socket->inode->i_ino : 0));
-
-               len += sprintf(buffer+len, "%-148s\n", tmpbuf);
-               if(len >= length)
-                       break;
-       next:
-               sp = sp->sklist_next;
-               i++;
-       }
-       SOCKHASH_UNLOCK_READ();
-
-       begin = len - (pos - offset);
-       *start = buffer + begin;
-       len -= begin;
-       if(len > length)
-               len = length;
-       return len;
-}
-
-/* These get exported and registered with procfs in af_inet6.c at init time. */
-int tcp6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
-       return get__netinfo6(&tcpv6_prot, buffer, 0, start, offset, length);
-}
-
-int udp6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
-       return get__netinfo6(&udpv6_prot, buffer, 1, start, offset, length);
-}
-
-int raw6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
-       return get__netinfo6(&rawv6_prot, buffer, 1, start, offset, length);
-}
-
 int afinet6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
 {
        int len = 0;
index 70394dc039b200b86210f91716eb6380bdfdee77..e0c78772382fe34f121f044ee1abfee7a8f8238f 100644 (file)
@@ -7,7 +7,7 @@
  *
  *     Adapted from linux/net/ipv4/raw.c
  *
- *     $Id: raw.c,v 1.26 1999/06/09 10:11:18 davem Exp $
+ *     $Id: raw.c,v 1.27 1999/07/02 11:26:40 davem Exp $
  *
  *     This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -45,57 +45,29 @@ struct sock *raw_v6_htable[RAWV6_HTABLE_SIZE];
 
 static void raw_v6_hash(struct sock *sk)
 {
-       struct sock **skp;
-       int num = sk->num;
+       struct sock **skp = &raw_v6_htable[sk->num & (RAWV6_HTABLE_SIZE - 1)];
 
-       num &= (RAWV6_HTABLE_SIZE - 1);
-       skp = &raw_v6_htable[num];
        SOCKHASH_LOCK_WRITE();
-       sk->next = *skp;
+       if ((sk->next = *skp) != NULL)
+               (*skp)->pprev = &sk->next;
        *skp = sk;
-       sk->hashent = num;
+       sk->pprev = skp;
+       sk->prot->inuse++;
+       if(sk->prot->highestinuse < sk->prot->inuse)
+               sk->prot->highestinuse = sk->prot->inuse;
        SOCKHASH_UNLOCK_WRITE();
 }
 
 static void raw_v6_unhash(struct sock *sk)
 {
-       struct sock **skp;
-       int num = sk->num;
-
-       num &= (RAWV6_HTABLE_SIZE - 1);
-       skp = &raw_v6_htable[num];
-
-       SOCKHASH_LOCK_WRITE();
-       while(*skp != NULL) {
-               if(*skp == sk) {
-                       *skp = sk->next;
-                       break;
-               }
-               skp = &((*skp)->next);
-       }
-       SOCKHASH_UNLOCK_WRITE();
-}
-
-static void raw_v6_rehash(struct sock *sk)
-{
-       struct sock **skp;
-       int num = sk->num;
-       int oldnum = sk->hashent;
-
-       num &= (RAWV6_HTABLE_SIZE - 1);
-       skp = &raw_v6_htable[oldnum];
-
        SOCKHASH_LOCK_WRITE();
-       while(*skp != NULL) {
-               if(*skp == sk) {
-                       *skp = sk->next;
-                       break;
-               }
-               skp = &((*skp)->next);
+       if (sk->pprev) {
+               if (sk->next)
+                       sk->next->pprev = sk->pprev;
+               *sk->pprev = sk->next;
+               sk->pprev = NULL;
+               sk->prot->inuse--;
        }
-       sk->next = raw_v6_htable[num];
-       raw_v6_htable[num] = sk;
-       sk->hashent = num;
        SOCKHASH_UNLOCK_WRITE();
 }
 
@@ -636,9 +608,80 @@ static int rawv6_init_sk(struct sock *sk)
        return(0);
 }
 
+static void get_raw6_sock(struct sock *sp, char *tmpbuf, int i)
+{
+       struct in6_addr *dest, *src;
+       __u16 destp, srcp;
+       int timer_active;
+       unsigned long timer_expires;
+
+       dest  = &sp->net_pinfo.af_inet6.daddr;
+       src   = &sp->net_pinfo.af_inet6.rcv_saddr;
+       destp = ntohs(sp->dport);
+       srcp  = ntohs(sp->sport);
+       timer_active = (sp->timer.prev != NULL) ? 2 : 0;
+       timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies);
+       sprintf(tmpbuf,
+               "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+               "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+               i,
+               src->s6_addr32[0], src->s6_addr32[1],
+               src->s6_addr32[2], src->s6_addr32[3], srcp,
+               dest->s6_addr32[0], dest->s6_addr32[1],
+               dest->s6_addr32[2], dest->s6_addr32[3], destp,
+               sp->state, 
+               atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
+               timer_active, timer_expires-jiffies, 0,
+               sp->socket->inode->i_uid, timer_active ? sp->timeout : 0,
+               sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+int raw6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+       int len = 0, num = 0, i;
+       off_t pos = 0;
+       off_t begin;
+       char tmpbuf[150];
+
+       if (offset < 149)
+               len += sprintf(buffer, "%-148s\n",
+                              "  sl  "                                         /* 6 */
+                              "local_address                         "         /* 38 */
+                              "remote_address                        "         /* 38 */
+                              "st tx_queue rx_queue tr tm->when retrnsmt"      /* 41 */
+                              "   uid  timeout inode");                        /* 21 */
+                                                                               /*----*/
+                                                                               /*144 */
+       pos = 149;
+       SOCKHASH_LOCK_READ();
+       for (i = 0; i < RAWV6_HTABLE_SIZE; i++) {
+               struct sock *sk;
+
+               for (sk = raw_v6_htable[i]; sk; sk = sk->next, num++) {
+                       if (sk->family != PF_INET6)
+                               continue;
+                       pos += 149;
+                       if (pos < offset)
+                               continue;
+                       get_raw6_sock(sk, tmpbuf, i);
+                       len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+                       if(len >= length)
+                               goto out;
+               }
+       }
+out:
+       SOCKHASH_UNLOCK_READ();
+       begin = len - (pos - offset);
+       *start = buffer + begin;
+       len -= begin;
+       if(len > length)
+               len = length;
+       if (len < 0)
+               len = 0; 
+       return len;
+}
+
 struct proto rawv6_prot = {
-       (struct sock *)&rawv6_prot,     /* sklist_next */
-       (struct sock *)&rawv6_prot,     /* sklist_prev */
        rawv6_close,                    /* close */
        udpv6_connect,                  /* connect */
        NULL,                           /* accept */
@@ -658,9 +701,7 @@ struct proto rawv6_prot = {
        rawv6_rcv_skb,                  /* backlog_rcv */
        raw_v6_hash,                    /* hash */
        raw_v6_unhash,                  /* unhash */
-       raw_v6_rehash,                  /* rehash */
-       NULL,                           /* good_socknum */
-       NULL,                           /* verify_bind */
+       NULL,                           /* get_port */
        128,                            /* max_header */
        0,                              /* retransmits */
        "RAW",                          /* name */
index 2164e245e547d8090acd503249812ed57601d81b..4cb6a56e92d13cc200d5212aa0a129dbd25b6058 100644 (file)
@@ -5,7 +5,7 @@
  *     Authors:
  *     Pedro Roque             <roque@di.fc.ul.pt>     
  *
- *     $Id: tcp_ipv6.c,v 1.108 1999/06/09 08:29:43 davem Exp $
+ *     $Id: tcp_ipv6.c,v 1.109 1999/07/02 11:26:41 davem Exp $
  *
  *     Based on: 
  *     linux/net/ipv4/tcp.c
@@ -84,101 +84,124 @@ static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
  * But it doesn't matter, the recalculation is in the rarest path
  * this function ever takes.
  */
-static int tcp_v6_verify_bind(struct sock *sk, unsigned short snum)
+static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
 {
        struct tcp_bind_bucket *tb;
-       int result = 0;
 
        SOCKHASH_LOCK_WRITE();
-       for(tb = tcp_bhash[tcp_bhashfn(snum)];
-           (tb && (tb->port != snum));
-           tb = tb->next)
-               ;
-       if(tb && tb->owners) {
-               /* Fast path for reuse ports, see include/net/tcp.h for a very
-                * detailed description of why this works, and why it is worth
-                * the effort at all. -DaveM
-                */
-               if((tb->flags & TCPB_FLAG_FASTREUSE)    &&
-                  (sk->reuse != 0)) {
-                       goto go_like_smoke;
+       if (snum == 0) {
+               int rover = tcp_port_rover;
+               int low = sysctl_local_port_range[0];
+               int high = sysctl_local_port_range[1];
+               int remaining = (high - low) + 1;
+
+               do {    rover++;
+                       if ((rover < low) || (rover > high))
+                               rover = low;
+                       tb = tcp_bhash[tcp_bhashfn(rover)];
+                       for ( ; tb; tb = tb->next)
+                               if (tb->port == rover)
+                                       goto next;
+                       break;
+               next:
+               } while (--remaining > 0);
+               tcp_port_rover = rover;
+
+               /* Exhausted local port range during search? */
+               if (remaining <= 0)
+                       goto fail;
+
+               /* OK, here is the one we will use. */
+               snum = rover;
+               tb = NULL;
+       } else {
+               for (tb = tcp_bhash[tcp_bhashfn(snum)];
+                    tb != NULL;
+                    tb = tb->next)
+                       if (tb->port == snum)
+                               break;
+       }
+       if (tb != NULL && tb->owners != NULL) {
+               if (tb->fastreuse != 0 && sk->reuse != 0) {
+                       goto success;
                } else {
-                       struct sock *sk2;
+                       struct sock *sk2 = tb->owners;
                        int sk_reuse = sk->reuse;
                        int addr_type = ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr);
 
                        /* We must walk the whole port owner list in this case. -DaveM */
-                       for(sk2 = tb->owners; sk2; sk2 = sk2->bind_next) {
-                               if(sk->bound_dev_if == sk2->bound_dev_if) {
-                                       if(!sk_reuse || !sk2->reuse || sk2->state == TCP_LISTEN) {
-                                               if(addr_type == IPV6_ADDR_ANY   ||
-                                                  !sk2->rcv_saddr              ||
-                                                  !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
-                                                                 &sk2->net_pinfo.af_inet6.rcv_saddr))
+                       for( ; sk2 != NULL; sk2 = sk2->bind_next) {
+                               if (sk->bound_dev_if == sk2->bound_dev_if) {
+                                       if (!sk_reuse   ||
+                                           !sk2->reuse ||
+                                           sk2->state == TCP_LISTEN) {
+                                               if (!sk2->rcv_saddr     ||
+                                                   !addr_type == IPV6_ADDR_ANY ||
+                                                   !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
+                                                                  &sk2->net_pinfo.af_inet6.rcv_saddr))
                                                        break;
                                        }
                                }
                        }
-                       if(sk2 != NULL)
-                               result = 1;
+                       /* If we found a conflict, fail. */
+                       if (sk2 != NULL)
+                               goto fail;
                }
        }
-       if(result == 0) {
-               if(tb == NULL) {
-                       if((tb = tcp_bucket_create(snum)) == NULL)
-                               result = 1;
-                       else if (sk->reuse && sk->state != TCP_LISTEN)
-                               tb->flags |= TCPB_FLAG_FASTREUSE;
-               } else {
-                       /* It could be pending garbage collection, this
-                        * kills the race and prevents it from disappearing
-                        * out from under us by the time we use it.  -DaveM
-                        */
-                       if(tb->owners == NULL) {
-                               if (!(tb->flags & TCPB_FLAG_LOCKED)) {
-                                       tb->flags = (TCPB_FLAG_LOCKED |
-                                                    ((sk->reuse &&
-                                                      sk->state != TCP_LISTEN) ?
-                                                     TCPB_FLAG_FASTREUSE : 0));
-                                       tcp_dec_slow_timer(TCP_SLT_BUCKETGC);
-                               } else if (!(tb->flags & TCPB_FLAG_GOODSOCKNUM)) {
-                                       /* Someone is in between the bind
-                                        * and the actual connect or listen.
-                                        * See if it was a legitimate reuse
-                                        * and we are as well, else punt.
-                                        */
-                                       if (sk->reuse == 0 ||
-                                           !(tb->flags & TCPB_FLAG_FASTREUSE))
-                                               result = 1;
-                               } else
-                                       tb->flags &= ~TCPB_FLAG_GOODSOCKNUM;
-                       }
-               }
-       }
-go_like_smoke:
+       if (tb == NULL &&
+           (tb = tcp_bucket_create(snum)) == NULL)
+                       goto fail;
+       if (tb->owners == NULL) {
+               if (sk->reuse && sk->state != TCP_LISTEN)
+                       tb->fastreuse = 1;
+               else
+                       tb->fastreuse = 0;
+       } else if (tb->fastreuse &&
+                  ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))
+               tb->fastreuse = 0;
+
+success:
+       sk->num = snum;
+       if ((sk->bind_next = tb->owners) != NULL)
+               tb->owners->bind_pprev = &sk->bind_next;
+       tb->owners = sk;
+       sk->bind_pprev = &tb->owners;
+       sk->prev = (struct sock *) tb;
+
        SOCKHASH_UNLOCK_WRITE();
-       return result;
+       return 0;
+
+fail:
+       SOCKHASH_UNLOCK_WRITE();
+       return 1;
 }
 
 static void tcp_v6_hash(struct sock *sk)
 {
-       /* Well, I know that it is ugly...
-          All this ->prot, ->af_specific etc. need LARGE cleanup --ANK
-        */
-       if (sk->tp_pinfo.af_tcp.af_specific == &ipv6_mapped) {
-               tcp_prot.hash(sk);
-               return;
-       }
        if(sk->state != TCP_CLOSE) {
                struct sock **skp;
 
+               /* Well, I know that it is ugly...
+                * All this ->prot, ->af_specific etc. need LARGE cleanup --ANK
+                */
+               if (sk->tp_pinfo.af_tcp.af_specific == &ipv6_mapped) {
+                       tcp_prot.hash(sk);
+                       return;
+               }
+
+               if(sk->state == TCP_LISTEN)
+                       skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
+               else
+                       skp = &tcp_ehash[(sk->hashent = tcp_v6_sk_hashfn(sk))];
+
                SOCKHASH_LOCK_WRITE();
-               skp = &tcp_ehash[(sk->hashent = tcp_v6_sk_hashfn(sk))];
                if((sk->next = *skp) != NULL)
                        (*skp)->pprev = &sk->next;
                *skp = sk;
                sk->pprev = skp;
-               tcp_sk_bindify(sk);
+               sk->prot->inuse++;
+               if(sk->prot->highestinuse < sk->prot->inuse)
+                       sk->prot->highestinuse = sk->prot->inuse;
                SOCKHASH_UNLOCK_WRITE();
        }
 }
@@ -191,39 +214,9 @@ static void tcp_v6_unhash(struct sock *sk)
                        sk->next->pprev = sk->pprev;
                *sk->pprev = sk->next;
                sk->pprev = NULL;
-               tcp_sk_unbindify(sk);
-               tcp_reg_zap(sk);
-       }
-       SOCKHASH_UNLOCK_WRITE();
-}
-
-static void tcp_v6_rehash(struct sock *sk)
-{
-       unsigned char state;
-
-       SOCKHASH_LOCK_WRITE();
-       state = sk->state;
-       if(sk->pprev != NULL) {
-               if(sk->next)
-                       sk->next->pprev = sk->pprev;
-               *sk->pprev = sk->next;
-               sk->pprev = NULL;
+               sk->prot->inuse--;
                tcp_reg_zap(sk);
-       }
-       if(state != TCP_CLOSE) {
-               struct sock **skp;
-
-               if(state == TCP_LISTEN)
-                       skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
-               else
-                       skp = &tcp_ehash[(sk->hashent = tcp_v6_sk_hashfn(sk))];
-
-               if((sk->next = *skp) != NULL)
-                       (*skp)->pprev = &sk->next;
-               *skp = sk;
-               sk->pprev = skp;
-               if(state == TCP_LISTEN)
-                       tcp_sk_bindify(sk);
+               __tcp_put_port(sk);
        }
        SOCKHASH_UNLOCK_WRITE();
 }
@@ -1063,8 +1056,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
        newsk->rcv_saddr= LOOPBACK4_IPV6;
 
        newsk->prot->hash(newsk);
-       add_to_prot_sklist(newsk);
-
+       tcp_inherit_port(sk, newsk);
        sk->data_ready(sk, 0); /* Deliver SIGIO */ 
 
        return newsk;
@@ -1666,18 +1658,214 @@ static int tcp_v6_destroy_sock(struct sock *sk)
 
        /* Clean up a locked TCP bind bucket, this only happens if a
         * port is allocated for a socket, but it never fully connects.
-        * In which case we will find num to be non-zero and daddr to
-        * be zero.
         */
-       if(ipv6_addr_any(&(sk->net_pinfo.af_inet6.daddr)) && sk->num != 0)
-               tcp_bucket_unlock(sk);
+       if(sk->prev != NULL)
+               tcp_put_port(sk);
 
        return inet6_destroy_sock(sk);
 }
 
+/* Proc filesystem TCPv6 sock list dumping. */
+static void get_openreq6(struct sock *sk, struct open_request *req, char *tmpbuf, int i)
+{
+       struct in6_addr *dest, *src;
+
+       src = &req->af.v6_req.loc_addr;
+       dest = &req->af.v6_req.rmt_addr;
+       sprintf(tmpbuf,
+               "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+               "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d",
+               i,
+               src->s6_addr32[0], src->s6_addr32[1],
+               src->s6_addr32[2], src->s6_addr32[3],
+               ntohs(sk->sport),
+               dest->s6_addr32[0], dest->s6_addr32[1],
+               dest->s6_addr32[2], dest->s6_addr32[3],
+               ntohs(req->rmt_port),
+               TCP_SYN_RECV,
+               0,0, /* could print option size, but that is af dependent. */
+               1,   /* timers active (only the expire timer) */  
+               (unsigned long)(req->expires - jiffies), 
+               req->retrans,
+               sk->socket ? sk->socket->inode->i_uid : 0,
+               0,  /* non standard timer */  
+               0 /* open_requests have no inode */
+               ); 
+}
+
+static void get_tcp6_sock(struct sock *sp, char *tmpbuf, int i)
+{
+       struct in6_addr *dest, *src;
+       __u16 destp, srcp;
+       int timer_active, timer_active1, timer_active2;
+       unsigned long timer_expires;
+       struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
+
+       dest  = &sp->net_pinfo.af_inet6.daddr;
+       src   = &sp->net_pinfo.af_inet6.rcv_saddr;
+       destp = ntohs(sp->dport);
+       srcp  = ntohs(sp->sport);
+       timer_active1 = tp->retransmit_timer.prev != NULL;
+       timer_active2 = sp->timer.prev != NULL;
+       timer_active    = 0;
+       timer_expires   = (unsigned) -1;
+       if (timer_active1 && tp->retransmit_timer.expires < timer_expires) {
+               timer_active    = 1;
+               timer_expires   = tp->retransmit_timer.expires;
+       }
+       if (timer_active2 && sp->timer.expires < timer_expires) {
+               timer_active    = 2;
+               timer_expires   = sp->timer.expires;
+       }
+       if(timer_active == 0)
+               timer_expires = jiffies;
+
+       sprintf(tmpbuf,
+               "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+               "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+               i,
+               src->s6_addr32[0], src->s6_addr32[1],
+               src->s6_addr32[2], src->s6_addr32[3], srcp,
+               dest->s6_addr32[0], dest->s6_addr32[1],
+               dest->s6_addr32[2], dest->s6_addr32[3], destp,
+               sp->state, 
+               tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
+               timer_active, timer_expires-jiffies,
+               tp->retransmits,
+               sp->socket ? sp->socket->inode->i_uid : 0,
+               timer_active ? sp->timeout : 0,
+               sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+static void get_timewait6_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
+{
+       extern int tcp_tw_death_row_slot;
+       struct in6_addr *dest, *src;
+       __u16 destp, srcp;
+       int slot_dist;
+
+       dest  = &tw->v6_daddr;
+       src   = &tw->v6_rcv_saddr;
+       destp = ntohs(tw->dport);
+       srcp  = ntohs(tw->sport);
+
+       slot_dist = tw->death_slot;
+       if(slot_dist > tcp_tw_death_row_slot)
+               slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot;
+       else
+               slot_dist = tcp_tw_death_row_slot - slot_dist;
+
+       sprintf(tmpbuf,
+               "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+               "%02X %08X:%08X %02X:%08X %08X %5d %8d %d",
+               i,
+               src->s6_addr32[0], src->s6_addr32[1],
+               src->s6_addr32[2], src->s6_addr32[3], srcp,
+               dest->s6_addr32[0], dest->s6_addr32[1],
+               dest->s6_addr32[2], dest->s6_addr32[3], destp,
+               TCP_TIME_WAIT, 0, 0,
+               3, slot_dist * TCP_TWKILL_PERIOD, 0, 0, 0, 0);
+}
+
+int tcp6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+       int len = 0, num = 0, i;
+       off_t begin, pos = 0;
+       char tmpbuf[150];
+
+       if(offset < 149)
+               len += sprintf(buffer, "%-148s\n",
+                              "  sl  "                                         /* 6 */
+                              "local_address                         "         /* 38 */
+                              "remote_address                        "         /* 38 */
+                              "st tx_queue rx_queue tr tm->when retrnsmt"      /* 41 */
+                              "   uid  timeout inode");                        /* 21 */
+                                                                               /*----*/
+                                                                               /*144 */
+
+       pos = 149;
+       SOCKHASH_LOCK_READ();
+
+       /* First, walk listening socket table. */
+       for(i = 0; i < TCP_LHTABLE_SIZE; i++) {
+               struct sock *sk = tcp_listening_hash[i];
+
+               for (sk = tcp_listening_hash[i]; sk; sk = sk->next, num++) {
+                       struct open_request *req;
+                       struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+                       if (sk->family != PF_INET6)
+                               continue;
+                       pos += 149;
+                       if (pos >= offset) {
+                               get_tcp6_sock(sk, tmpbuf, num);
+                               len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+                               if (len >= length)
+                                       goto out;
+                       }
+                       for (req = tp->syn_wait_queue; req; req = req->dl_next, num++) {
+                               if (req->sk)
+                                       continue;
+                               pos += 149;
+                               if (pos < offset)
+                                       continue;
+                               get_openreq6(sk, req, tmpbuf, num);
+                               len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+                               if(len >= length) 
+                                       goto out;
+                       }
+               }
+       }
+
+       /* Next, walk established hash chain. */
+       for (i = 0; i < (tcp_ehash_size >> 1); i++) {
+               struct sock *sk;
+
+               for(sk = tcp_ehash[i]; sk; sk = sk->next, num++) {
+                       if (sk->family != PF_INET6)
+                               continue;
+                       pos += 149;
+                       if (pos < offset)
+                               continue;
+                       get_tcp6_sock(sk, tmpbuf, num);
+                       len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+                       if(len >= length)
+                               goto out;
+               }
+       }
+
+       /* Finally, walk time wait buckets. */
+       for (i = (tcp_ehash_size>>1); i < tcp_ehash_size; i++) {
+               struct tcp_tw_bucket *tw;
+               for (tw = (struct tcp_tw_bucket *)tcp_ehash[i];
+                    tw != NULL;
+                    tw = (struct tcp_tw_bucket *)tw->next, num++) {
+                       if (tw->family != PF_INET6)
+                               continue;
+                       pos += 149;
+                       if (pos < offset)
+                               continue;
+                       get_timewait6_sock(tw, tmpbuf, num);
+                       len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+                       if(len >= length)
+                               goto out;
+               }
+       }
+
+out:
+       SOCKHASH_UNLOCK_READ();
+
+       begin = len - (pos - offset);
+       *start = buffer + begin;
+       len -= begin;
+       if(len > length)
+               len = length;
+       if (len < 0)
+               len = 0; 
+       return len;
+}
+
 struct proto tcpv6_prot = {
-       (struct sock *)&tcpv6_prot,     /* sklist_next */
-       (struct sock *)&tcpv6_prot,     /* sklist_prev */
        tcp_close,                      /* close */
        tcp_v6_connect,                 /* connect */
        tcp_accept,                     /* accept */
@@ -1697,9 +1885,7 @@ struct proto tcpv6_prot = {
        tcp_v6_do_rcv,                  /* backlog_rcv */
        tcp_v6_hash,                    /* hash */
        tcp_v6_unhash,                  /* unhash */
-       tcp_v6_rehash,                  /* rehash */
-       tcp_good_socknum,               /* good_socknum */
-       tcp_v6_verify_bind,             /* verify_bind */
+       tcp_v6_get_port,                /* get_port */
        128,                            /* max_header */
        0,                              /* retransmits */
        "TCPv6",                        /* name */
index da020d8fbee6f85cf23e34a3b58476a4643b98b4..b3045c694d6293a0e53742b735386add89c61155 100644 (file)
@@ -7,7 +7,7 @@
  *
  *     Based on linux/ipv4/udp.c
  *
- *     $Id: udp.c,v 1.42 1999/06/09 10:11:24 davem Exp $
+ *     $Id: udp.c,v 1.43 1999/07/02 11:26:44 davem Exp $
  *
  *     This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -49,101 +49,102 @@ struct udp_mib udp_stats_in6;
 /* Grrr, addr_type already calculated by caller, but I don't want
  * to add some silly "cookie" argument to this method just for that.
  */
-static int udp_v6_verify_bind(struct sock *sk, unsigned short snum)
+static int udp_v6_get_port(struct sock *sk, unsigned short snum)
 {
-       struct sock *sk2;
-       int addr_type = ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr);
-       int retval = 0, sk_reuse = sk->reuse;
-
-       SOCKHASH_LOCK_READ();
-       for(sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) {
-               if((sk2->num == snum) && (sk2 != sk)) {
-                       unsigned char state = sk2->state;
-                       int sk2_reuse = sk2->reuse;
-
-                       /* Two sockets can be bound to the same port if they're
-                        * bound to different interfaces.
-                        */
-
-                       if(sk2->bound_dev_if != sk->bound_dev_if)
-                               continue;
-
-                       if(addr_type == IPV6_ADDR_ANY || (!sk2->rcv_saddr)) {
-                               if((!sk2_reuse)                 ||
-                                  (!sk_reuse)                  ||
-                                  (state == TCP_LISTEN)) {
-                                       retval = 1;
-                                       break;
-                               }
-                       } else if(!ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
-                                                &sk2->net_pinfo.af_inet6.rcv_saddr)) {
-                               if((!sk_reuse)                  ||
-                                  (!sk2_reuse)                 ||
-                                  (state == TCP_LISTEN)) {
-                                       retval = 1;
-                                       break;
-                               }
+       SOCKHASH_LOCK_WRITE();
+       if (snum == 0) {
+               int best_size_so_far, best, result, i;
+
+               if (udp_port_rover > sysctl_local_port_range[1] ||
+                   udp_port_rover < sysctl_local_port_range[0])
+                       udp_port_rover = sysctl_local_port_range[0];
+               best_size_so_far = 32767;
+               best = result = udp_port_rover;
+               for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
+                       struct sock *sk;
+                       int size;
+
+                       sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+                       if (!sk) {
+                               if (result > sysctl_local_port_range[1])
+                                       result = sysctl_local_port_range[0] +
+                                               ((result - sysctl_local_port_range[0]) &
+                                                (UDP_HTABLE_SIZE - 1));
+                               goto gotit;
                        }
+                       size = 0;
+                       do {
+                               if (++size >= best_size_so_far)
+                                       goto next;
+                       } while ((sk = sk->next) != NULL);
+                       best_size_so_far = size;
+                       best = result;
+               next:
+               }
+               result = best;
+               for(;; result += UDP_HTABLE_SIZE) {
+                       if (result > sysctl_local_port_range[1])
+                               result = sysctl_local_port_range[0]
+                                       + ((result - sysctl_local_port_range[0]) &
+                                          (UDP_HTABLE_SIZE - 1));
+                       if (!udp_lport_inuse(result))
+                               break;
+               }
+gotit:
+               udp_port_rover = snum = result;
+       } else {
+               struct sock *sk2;
+               int addr_type = ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr);
+
+               for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+                    sk2 != NULL;
+                    sk2 = sk2->next) {
+                       if (sk2->num == snum &&
+                           sk2 != sk &&
+                           sk2->bound_dev_if == sk->bound_dev_if &&
+                           (!sk2->rcv_saddr ||
+                            addr_type == IPV6_ADDR_ANY ||
+                            !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
+                                           &sk2->net_pinfo.af_inet6.rcv_saddr)) &&
+                           (!sk2->reuse || !sk->reuse))
+                               goto fail;
                }
        }
-       SOCKHASH_UNLOCK_READ();
-       return retval;
-}
-
-static void udp_v6_hash(struct sock *sk)
-{
-       struct sock **skp;
-       int num = sk->num;
 
-       num &= (UDP_HTABLE_SIZE - 1);
-       skp = &udp_hash[num];
+       sk->num = snum;
+       SOCKHASH_UNLOCK_WRITE();
+       return 0;
 
-       SOCKHASH_LOCK_WRITE();
-       sk->next = *skp;
-       *skp = sk;
-       sk->hashent = num;
+fail:
        SOCKHASH_UNLOCK_WRITE();
+       return 1;
 }
 
-static void udp_v6_unhash(struct sock *sk)
+static void udp_v6_hash(struct sock *sk)
 {
-       struct sock **skp;
-       int num = sk->num;
-
-       num &= (UDP_HTABLE_SIZE - 1);
-       skp = &udp_hash[num];
+       struct sock **skp = &udp_hash[sk->num & (UDP_HTABLE_SIZE - 1)];
 
        SOCKHASH_LOCK_WRITE();
-       while(*skp != NULL) {
-               if(*skp == sk) {
-                       *skp = sk->next;
-                       break;
-               }
-               skp = &((*skp)->next);
-       }
+       if ((sk->next = *skp) != NULL)
+               (*skp)->pprev = &sk->next;
+       *skp = sk;
+       sk->pprev = skp;
+       sk->prot->inuse++;
+       if(sk->prot->highestinuse < sk->prot->inuse)
+               sk->prot->highestinuse = sk->prot->inuse;
        SOCKHASH_UNLOCK_WRITE();
 }
 
-static void udp_v6_rehash(struct sock *sk)
+static void udp_v6_unhash(struct sock *sk)
 {
-       struct sock **skp;
-       int num = sk->num;
-       int oldnum = sk->hashent;
-
-       num &= (UDP_HTABLE_SIZE - 1);
-       skp = &udp_hash[oldnum];
-
        SOCKHASH_LOCK_WRITE();
-       while(*skp != NULL) {
-               if(*skp == sk) {
-                       *skp = sk->next;
-                       break;
-               }
-               skp = &((*skp)->next);
+       if (sk->pprev) {
+               if (sk->next)
+                       sk->next->pprev = sk->pprev;
+               *sk->pprev = sk->next;
+               sk->pprev = NULL;
+               sk->prot->inuse--;
        }
-       sk->next = udp_hash[num];
-       udp_hash[num] = sk;
-       sk->hashent = num;
        SOCKHASH_UNLOCK_WRITE();
 }
 
@@ -216,10 +217,10 @@ int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        }
 
        if (addr_len < sizeof(*usin)) 
-               return(-EINVAL);
+               return -EINVAL;
 
        if (usin->sin6_family && usin->sin6_family != AF_INET6) 
-               return(-EAFNOSUPPORT);
+               return -EAFNOSUPPORT;
 
        fl.fl6_flowlabel = 0;
        if (np->sndflow) {
@@ -764,7 +765,7 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen)
                return -EMSGSIZE;
        
        if (msg->msg_flags & ~(MSG_DONTROUTE|MSG_DONTWAIT))
-               return(-EINVAL);
+               return -EINVAL;
 
        fl.fl6_flowlabel = 0;
 
@@ -773,13 +774,13 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen)
                        return udp_sendmsg(sk, msg, ulen);
 
                if (addr_len < sizeof(*sin6))
-                       return(-EINVAL);
+                       return -EINVAL;
 
                if (sin6->sin6_family && sin6->sin6_family != AF_INET6)
-                       return(-EINVAL);
+                       return -EINVAL;
 
                if (sin6->sin6_port == 0)
-                       return(-EINVAL);
+                       return -EINVAL;
 
                udh.uh.dest = sin6->sin6_port;
                daddr = &sin6->sin6_addr;
@@ -800,7 +801,7 @@ static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen)
                        daddr = &sk->net_pinfo.af_inet6.daddr;
        } else {
                if (sk->state != TCP_ESTABLISHED)
-                       return(-ENOTCONN);
+                       return -ENOTCONN;
 
                udh.uh.dest = sk->dport;
                daddr = &sk->net_pinfo.af_inet6.daddr;
@@ -885,10 +886,80 @@ static struct inet6_protocol udpv6_protocol =
        "UDPv6"                 /* name                 */
 };
 
+static void get_udp6_sock(struct sock *sp, char *tmpbuf, int i)
+{
+       struct in6_addr *dest, *src;
+       __u16 destp, srcp;
+       int timer_active;
+       unsigned long timer_expires;
+
+       dest  = &sp->net_pinfo.af_inet6.daddr;
+       src   = &sp->net_pinfo.af_inet6.rcv_saddr;
+       destp = ntohs(sp->dport);
+       srcp  = ntohs(sp->sport);
+       timer_active = (sp->timer.prev != NULL) ? 2 : 0;
+       timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies);
+       sprintf(tmpbuf,
+               "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+               "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+               i,
+               src->s6_addr32[0], src->s6_addr32[1],
+               src->s6_addr32[2], src->s6_addr32[3], srcp,
+               dest->s6_addr32[0], dest->s6_addr32[1],
+               dest->s6_addr32[2], dest->s6_addr32[3], destp,
+               sp->state, 
+               atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
+               timer_active, timer_expires-jiffies, 0,
+               sp->socket->inode->i_uid, timer_active ? sp->timeout : 0,
+               sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+int udp6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+       int len = 0, num = 0, i;
+       off_t pos = 0;
+       off_t begin;
+       char tmpbuf[150];
+
+       if (offset < 149)
+               len += sprintf(buffer, "%-148s\n",
+                              "  sl  "                                         /* 6 */
+                              "local_address                         "         /* 38 */
+                              "remote_address                        "         /* 38 */
+                              "st tx_queue rx_queue tr tm->when retrnsmt"      /* 41 */
+                              "   uid  timeout inode");                        /* 21 */
+                                                                               /*----*/
+                                                                               /*144 */
+       pos = 149;
+       SOCKHASH_LOCK_READ();
+       for (i = 0; i < UDP_HTABLE_SIZE; i++) {
+               struct sock *sk;
+
+               for (sk = udp_hash[i]; sk; sk = sk->next, num++) {
+                       if (sk->family != PF_INET6)
+                               continue;
+                       pos += 149;
+                       if (pos < offset)
+                               continue;
+                       get_udp6_sock(sk, tmpbuf, i);
+                       len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+                       if(len >= length)
+                               goto out;
+               }
+       }
+out:
+       SOCKHASH_UNLOCK_READ();
+       begin = len - (pos - offset);
+       *start = buffer + begin;
+       len -= begin;
+       if(len > length)
+               len = length;
+       if (len < 0)
+               len = 0; 
+       return len;
+}
 
 struct proto udpv6_prot = {
-       (struct sock *)&udpv6_prot,     /* sklist_next */
-       (struct sock *)&udpv6_prot,     /* sklist_prev */
        udpv6_close,                    /* close */
        udpv6_connect,                  /* connect */
        NULL,                           /* accept */
@@ -908,9 +979,7 @@ struct proto udpv6_prot = {
        udpv6_queue_rcv_skb,            /* backlog_rcv */
        udp_v6_hash,                    /* hash */
        udp_v6_unhash,                  /* unhash */
-       udp_v6_rehash,                  /* rehash */
-       udp_good_socknum,               /* good_socknum */
-       udp_v6_verify_bind,             /* verify_bind */
+       udp_v6_get_port,                /* get_port */
        128,                            /* max_header */
        0,                              /* retransmits */
        "UDP",                          /* name */
index ff92b23f11dc441dea4c5b41fddd878c10f6ccbe..b67fe251d03bf000703af11b0a1a5dca525f3091 100644 (file)
@@ -60,6 +60,9 @@ extern __u32 sysctl_rmem_max;
 #include <net/transp_v6.h>
 
 extern int tcp_tw_death_row_slot;
+extern int sysctl_local_port_range[2];
+extern int tcp_port_rover;
+extern int udp_port_rover;
 #endif
 
 #endif
@@ -281,13 +284,11 @@ EXPORT_SYMBOL(inet_sendmsg);
 EXPORT_SYMBOL(inet_recvmsg);
 
 /* Socket demultiplexing. */
-EXPORT_SYMBOL(tcp_good_socknum);
 EXPORT_SYMBOL(tcp_ehash);
 EXPORT_SYMBOL(tcp_ehash_size);
 EXPORT_SYMBOL(tcp_listening_hash);
 EXPORT_SYMBOL(tcp_bhash);
 EXPORT_SYMBOL(tcp_bhash_size);
-EXPORT_SYMBOL(udp_good_socknum);
 EXPORT_SYMBOL(udp_hash);
 
 EXPORT_SYMBOL(destroy_sock);
@@ -328,7 +329,9 @@ EXPORT_SYMBOL(tcp_v4_send_check);
 EXPORT_SYMBOL(tcp_v4_conn_request);
 EXPORT_SYMBOL(tcp_create_openreq_child);
 EXPORT_SYMBOL(tcp_bucket_create);
-EXPORT_SYMBOL(tcp_bucket_unlock);
+EXPORT_SYMBOL(__tcp_put_port);
+EXPORT_SYMBOL(tcp_put_port);
+EXPORT_SYMBOL(tcp_inherit_port);
 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
 EXPORT_SYMBOL(tcp_v4_do_rcv);
 EXPORT_SYMBOL(tcp_v4_connect);
@@ -344,6 +347,9 @@ EXPORT_SYMBOL(tcp_transmit_skb);
 EXPORT_SYMBOL(tcp_connect);
 EXPORT_SYMBOL(tcp_make_synack);
 EXPORT_SYMBOL(tcp_tw_death_row_slot);
+EXPORT_SYMBOL(sysctl_local_port_range);
+EXPORT_SYMBOL(tcp_port_rover);
+EXPORT_SYMBOL(udp_port_rover);
 EXPORT_SYMBOL(tcp_sync_mss);
 EXPORT_SYMBOL(net_statistics); 
 
index d0de24eff8ad54dca3cda8aeeb2d3e7aef7af89b..87c0aedc1d01eca5a5e4deb9bcf36814b1b2068d 100644 (file)
@@ -42,7 +42,6 @@
 #define __KERNEL_SYSCALLS__
 
 #include <linux/version.h>
-#include <linux/config.h>
 #include <linux/types.h>
 #include <linux/malloc.h>
 #include <linux/sched.h>