* and that it is in the task area before calling this: this routine does
* no checking.
*/
-static unsigned long get_long(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr)
+static unsigned long get_long(struct task_struct * tsk,
+ struct vm_area_struct * vma, unsigned long addr)
{
pgd_t * pgdir;
pmd_t * pgmiddle;
repeat:
pgdir = pgd_offset(vma->vm_mm, addr);
if (pgd_none(*pgdir)) {
- handle_mm_fault(mm, vma, addr, 0);
+ handle_mm_fault(tsk, vma, addr, 0);
goto repeat;
}
if (pgd_bad(*pgdir)) {
}
pgmiddle = pmd_offset(pgdir, addr);
if (pmd_none(*pgmiddle)) {
- handle_mm_fault(mm, vma, addr, 0);
+ handle_mm_fault(tsk, vma, addr, 0);
goto repeat;
}
if (pmd_bad(*pgmiddle)) {
}
pgtable = pte_offset(pgmiddle, addr);
if (!pte_present(*pgtable)) {
- handle_mm_fault(mm, vma, addr, 0);
+ handle_mm_fault(tsk, vma, addr, 0);
goto repeat;
}
page = pte_page(*pgtable);
* Now keeps R/W state of page so that a text page stays readonly
* even if a debugger scribbles breakpoints into it. -M.U-
*/
-static void put_long(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long addr,
+static void put_long(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long addr,
unsigned long data)
{
pgd_t *pgdir;
repeat:
pgdir = pgd_offset(vma->vm_mm, addr);
if (!pgd_present(*pgdir)) {
- handle_mm_fault(mm, vma, addr, 1);
+ handle_mm_fault(tsk, vma, addr, 1);
goto repeat;
}
if (pgd_bad(*pgdir)) {
}
pgmiddle = pmd_offset(pgdir, addr);
if (pmd_none(*pgmiddle)) {
- handle_mm_fault(mm, vma, addr, 1);
+ handle_mm_fault(tsk, vma, addr, 1);
goto repeat;
}
if (pmd_bad(*pgmiddle)) {
}
pgtable = pte_offset(pgmiddle, addr);
if (!pte_present(*pgtable)) {
- handle_mm_fault(mm, vma, addr, 1);
+ handle_mm_fault(tsk, vma, addr, 1);
goto repeat;
}
page = pte_page(*pgtable);
if (!pte_write(*pgtable)) {
- handle_mm_fault(mm, vma, addr, 1);
+ handle_mm_fault(tsk, vma, addr, 1);
goto repeat;
}
/* this is a hack for non-kernel-mapped video buffers and similar */
* This routine checks the page boundaries, and that the offset is
* within the task area. It then calls get_long() to read a long.
*/
-static int read_long(struct mm_struct * mm, unsigned long addr,
+static int read_long(struct task_struct * tsk, unsigned long addr,
unsigned long * result)
{
- struct vm_area_struct * vma = find_extend_vma(mm, addr);
+ struct vm_area_struct * vma = find_extend_vma(tsk, addr);
if (!vma)
return -EIO;
if (!vma_high || vma_high->vm_start != vma->vm_end)
return -EIO;
}
- low = get_long(mm, vma, addr & ~(sizeof(long)-1));
- high = get_long(mm, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
+ low = get_long(tsk, vma, addr & ~(sizeof(long)-1));
+ high = get_long(tsk, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
switch (addr & (sizeof(long)-1)) {
case 1:
low >>= 8;
}
*result = low;
} else
- *result = get_long(mm, vma, addr);
+ *result = get_long(tsk, vma, addr);
return 0;
}
* This routine checks the page boundaries, and that the offset is
* within the task area. It then calls put_long() to write a long.
*/
-static int write_long(struct mm_struct * mm, unsigned long addr,
+static int write_long(struct task_struct * tsk, unsigned long addr,
unsigned long data)
{
- struct vm_area_struct * vma = find_extend_vma(mm, addr);
+ struct vm_area_struct * vma = find_extend_vma(tsk, addr);
if (!vma)
return -EIO;
if (!vma_high || vma_high->vm_start != vma->vm_end)
return -EIO;
}
- low = get_long(mm, vma, addr & ~(sizeof(long)-1));
- high = get_long(mm, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
+ low = get_long(tsk, vma, addr & ~(sizeof(long)-1));
+ high = get_long(tsk, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1));
switch (addr & (sizeof(long)-1)) {
case 0: /* shouldn't happen, but safety first */
low = data;
high |= data >> 8;
break;
}
- put_long(mm, vma, addr & ~(sizeof(long)-1),low);
- put_long(mm, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1),high);
+ put_long(tsk, vma, addr & ~(sizeof(long)-1),low);
+ put_long(tsk, vma_high, (addr+sizeof(long)) & ~(sizeof(long)-1),high);
} else
- put_long(mm, vma, addr, data);
+ put_long(tsk, vma, addr, data);
return 0;
}
unsigned long tmp;
down(&child->mm->mmap_sem);
- ret = read_long(child->mm, addr, &tmp);
+ ret = read_long(child, addr, &tmp);
up(&child->mm->mmap_sem);
if (ret >= 0)
ret = put_user(tmp,(unsigned long *) data);
case PTRACE_POKETEXT: /* write the word at location addr. */
case PTRACE_POKEDATA:
down(&child->mm->mmap_sem);
- ret = write_long(child->mm,addr,data);
+ ret = write_long(child,addr,data);
up(&child->mm->mmap_sem);
goto out;
start &= PAGE_MASK;
for (;;) {
- handle_mm_fault(current->mm, vma, start, 1);
+ if (handle_mm_fault(current, vma, start, 1) <= 0)
+ goto bad_area;
if (!size)
break;
size--;
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- if (!handle_mm_fault(mm, vma, address, write))
- goto do_sigbus;
+ {
+ int fault = handle_mm_fault(tsk, vma, address, write);
+ if (fault < 0)
+ goto out_of_memory;
+ if (!fault)
+ goto do_sigbus;
+ }
/*
* Did it hit the DOS screen memory VA from vm86 mode?
* We ran out of memory, or some other thing happened to us that made
* us unable to handle the page fault gracefully.
*/
+out_of_memory:
+ up(&mm->mmap_sem);
+ printk("VM: killing process %s\n", tsk->comm);
+ if (error_code & 4)
+ do_exit(SIGKILL);
+ goto no_context;
+
do_sigbus:
up(&mm->mmap_sem);
* = ((hwif->channel ? 2 : 0) + (drive->select.b.unit & 0x01));
*/
+#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/delay.h>
lo->lo_backing_file->f_dentry = file->f_dentry;
lo->lo_backing_file->f_op = file->f_op;
lo->lo_backing_file->private_data = file->private_data;
+ file_moveto(lo->lo_backing_file, file);
error = get_write_access(inode);
if (error) {
* #endif
*/
+#include <linux/config.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/ioport.h>
/* Aux routines for the syncer */
-static void all_files_read_only(void) /* Kill write permissions of all files */
-{
- struct file *file;
-
- for (file = inuse_filps; file; file = file->f_next)
- if (file->f_dentry && atomic_read(&file->f_count) && S_ISREG(file->f_dentry->d_inode->i_mode))
- file->f_mode &= ~2;
-}
-
static int is_local_disk(kdev_t dev) /* Guess if the device is a local hard drive */
{
unsigned int major = MAJOR(dev);
struct super_block *sb = get_super(dev);
struct vfsmount *vfsmnt;
int ret, flags;
+ struct list_head *p;
if (!sb) {
printk("Superblock not found\n");
printk("R/O\n");
return;
}
+
+ file_list_lock();
+ for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
+ struct file *file = list_entry(p, struct file, f_list);
+ if (file->f_dentry && file_count(file)
+ && S_ISREG(file->f_dentry->d_inode->i_mode))
+ file->f_mode &= ~2;
+ }
+ file_list_unlock();
DQUOT_OFF(dev);
fsync_dev(dev);
flags = MS_RDONLY;
remount_flag = (emergency_sync_scheduled == EMERG_REMOUNT);
emergency_sync_scheduled = 0;
- if (remount_flag)
- all_files_read_only();
-
for (mnt = vfsmntlist; mnt; mnt = mnt->mnt_next)
if (is_local_disk(mnt->mnt_dev))
go_sync(mnt->mnt_dev, remount_flag);
static int check_tty_count(struct tty_struct *tty, const char *routine)
{
#ifdef CHECK_TTY_COUNT
- struct file *f;
+ struct list_head *p;
int count = 0;
- for(f = inuse_filps; f; f = f->f_next) {
- if(f->private_data == tty)
+ file_list_lock();
+ for(p = tty->tty_files.next; p != &tty->tty_files; p = p->next) {
+ if(list_entry(p, struct file, f_list)->private_data == tty)
count++;
}
+ file_list_unlock();
if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
tty->driver.subtype == PTY_TYPE_SLAVE &&
tty->link && tty->link->count)
void do_tty_hangup(void *data)
{
struct tty_struct *tty = (struct tty_struct *) data;
- struct file * filp;
struct file * cons_filp = NULL;
struct task_struct *p;
+ struct list_head *l;
int closecount = 0, n;
if (!tty)
lock_kernel();
check_tty_count(tty, "do_tty_hangup");
- for (filp = inuse_filps; filp; filp = filp->f_next) {
- if (filp->private_data != tty)
- continue;
+ file_list_lock();
+ for (l = tty->tty_files.next; l != &tty->tty_files; l = l->next) {
+ struct file * filp = list_entry(l, struct file, f_list);
if (!filp->f_dentry)
continue;
- if (!filp->f_dentry->d_inode)
- continue;
if (filp->f_dentry->d_inode->i_rdev == CONSOLE_DEV ||
filp->f_dentry->d_inode->i_rdev == SYSCONS_DEV) {
cons_filp = filp;
if (filp->f_op != &tty_fops)
continue;
closecount++;
- tty_fasync(-1, filp, 0);
+ tty_fasync(-1, filp, 0); /* can't block */
filp->f_op = &hung_up_tty_fops;
}
+ file_list_unlock();
/* FIXME! What are the locking issues here? This may me overdoing things.. */
{
init_dev_done:
#endif
filp->private_data = tty;
+ file_move(filp, &tty->tty_files);
check_tty_count(tty, "tty_open");
if (tty->driver.type == TTY_DRIVER_TYPE_PTY &&
tty->driver.subtype == PTY_TYPE_MASTER)
tty->tq_hangup.routine = do_tty_hangup;
tty->tq_hangup.data = tty;
sema_init(&tty->atomic_read, 1);
+ INIT_LIST_HEAD(&tty->tty_files);
}
/*
*
*/
-#include <linux/config.h>
#include <linux/parport.h>
#include <linux/delay.h>
#include <asm/uaccess.h>
kdev_t devt = inode->i_rdev;
int dev;
- if (atomic_read(&filp->f_count) > 1)
+ if (file_count(filp) > 1)
return 0;
dev = TAPE_NR(devt);
if (newfd < 0)
return newfd;
- current->files->fd [newfd] = usema->filp;
- atomic_inc(&usema->filp->f_count);
+ get_file(usema);
+ fd_install(newfd, usema->filp);
/* Is that it? */
printk("UIOCATTACHSEMA: new usema fd is %d", newfd);
return newfd;
bh = bh->b_this_page;
} while (bh != head);
if (rw == READ)
- ++current->mm->maj_flt;
+ ++current->maj_flt;
if ((rw == READ) && nr) {
if (Page_Uptodate(page))
BUG();
nr++;
} while (iblock++, (bh = bh->b_this_page) != head);
- ++current->mm->maj_flt;
+ ++current->maj_flt;
if (nr) {
if (Page_Uptodate(page))
BUG();
static void add_dquot_ref(kdev_t dev, short type)
{
struct super_block *sb = get_super(dev);
- struct file *filp;
+ struct list_head *p;
struct inode *inode;
if (!sb || !sb->dq_op)
return; /* nothing to do */
- for (filp = inuse_filps; filp; filp = filp->f_next) {
+ file_list_lock();
+ for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
+ struct file *filp = list_entry(p, struct file, f_list);
if (!filp->f_dentry)
continue;
- if (filp->f_dentry->d_sb != sb)
- continue;
inode = filp->f_dentry->d_inode;
if (!inode)
continue;
/* N.B. race problem -- filp could become unused */
if (filp->f_mode & FMODE_WRITE) {
+ file_list_unlock();
sb->dq_op->initialize(inode, type);
inode->i_flags |= S_QUOTA;
+ file_list_lock();
}
}
+ file_list_unlock();
}
static void reset_dquot_ptrs(kdev_t dev, short type)
{
struct super_block *sb = get_super(dev);
- struct file *filp;
+ struct list_head *p;
struct inode *inode;
struct dquot *dquot;
int cnt;
/* free any quota for unused dentries */
shrink_dcache_sb(sb);
- for (filp = inuse_filps; filp; filp = filp->f_next) {
+ file_list_lock();
+ for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
+ struct file *filp = list_entry(p, struct file, f_list);
if (!filp->f_dentry)
continue;
- if (filp->f_dentry->d_sb != sb)
- continue;
inode = filp->f_dentry->d_inode;
if (!inode)
continue;
inode->i_flags &= ~S_QUOTA;
put_it:
if (dquot != NODQUOT) {
+ file_list_unlock();
dqput(dquot);
/* we may have blocked ... */
goto restart;
}
}
}
+ file_list_unlock();
}
static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number)
{
struct inode * inode = dentry->d_inode;
struct file * f;
+ struct list_head * l = NULL;
int fd, error;
+ if (inode->i_sb)
+ l = &inode->i_sb->s_files;
+
error = -EINVAL;
if (!inode->i_op || !inode->i_op->default_file_ops)
goto out;
if (error)
goto out_filp;
}
+ file_move(f, l);
fd_install(fd, f);
dget(dentry);
}
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/init.h>
+#include <linux/smp_lock.h>
/* SLAB cache for filp's. */
static kmem_cache_t *filp_cache;
int nr_free_files = 0; /* read only */
int max_files = NR_FILE;/* tunable */
-/* Free list management, if you are here you must have f_count == 0 */
-static struct file * free_filps = NULL;
-
-static void insert_file_free(struct file *file)
-{
- if((file->f_next = free_filps) != NULL)
- free_filps->f_pprev = &file->f_next;
- free_filps = file;
- file->f_pprev = &free_filps;
- nr_free_files++;
-}
-
-/* The list of in-use filp's must be exported (ugh...) */
-struct file *inuse_filps = NULL;
-
-static inline void put_inuse(struct file *file)
-{
- if((file->f_next = inuse_filps) != NULL)
- inuse_filps->f_pprev = &file->f_next;
- inuse_filps = file;
- file->f_pprev = &inuse_filps;
-}
-
-/* It does not matter which list it is on. */
-static inline void remove_filp(struct file *file)
-{
- if(file->f_next)
- file->f_next->f_pprev = file->f_pprev;
- *file->f_pprev = file->f_next;
-}
-
+/* Here the new files go */
+static LIST_HEAD(anon_list);
+/* And here the free ones sit */
+static LIST_HEAD(free_list);
+/* public *and* exported. Not pretty! */
+spinlock_t files_lock = SPIN_LOCK_UNLOCKED;
void __init file_table_init(void)
{
/* Find an unused file structure and return a pointer to it.
* Returns NULL, if there are no more free file structures or
* we run out of memory.
+ *
+ * SMP-safe.
*/
struct file * get_empty_filp(void)
{
static int old_max = 0;
struct file * f;
+ file_list_lock();
if (nr_free_files > NR_RESERVED_FILES) {
used_one:
- f = free_filps;
- remove_filp(f);
+ f = list_entry(free_list.next, struct file, f_list);
+ list_del(&f->f_list);
nr_free_files--;
new_one:
+ file_list_unlock();
memset(f, 0, sizeof(*f));
- atomic_set(&f->f_count, 1);
+ atomic_set(&f->f_count,1);
f->f_version = ++event;
f->f_uid = current->fsuid;
f->f_gid = current->fsgid;
- put_inuse(f);
+ file_list_lock();
+ list_add(&f->f_list, &anon_list);
+ file_list_unlock();
return f;
}
/*
* Allocate a new one if we're below the limit.
*/
if (nr_files < max_files) {
+ file_list_unlock();
f = kmem_cache_alloc(filp_cache, SLAB_KERNEL);
+ file_list_lock();
if (f) {
nr_files++;
goto new_one;
printk("VFS: file-max limit %d reached\n", max_files);
old_max = max_files;
}
+ file_list_unlock();
return NULL;
}
return 0;
}
-void fput(struct file *file)
+void _fput(struct file *file)
{
- if (atomic_dec_and_test(&file->f_count)) {
- locks_remove_flock(file);
- __fput(file);
- remove_filp(file);
- insert_file_free(file);
- }
+ atomic_inc(&file->f_count);
+
+ lock_kernel();
+ locks_remove_flock(file); /* Still need the */
+ __fput(file); /* big lock here. */
+ unlock_kernel();
+
+ atomic_set(&file->f_count, 0);
+ file_list_lock();
+ list_del(&file->f_list);
+ list_add(&file->f_list, &free_list);
+ nr_free_files++;
+ file_list_unlock();
}
+/* Here. put_filp() is SMP-safe now. */
+
void put_filp(struct file *file)
{
- if (atomic_dec_and_test(&file->f_count)) {
- remove_filp(file);
- insert_file_free(file);
+ if(atomic_dec_and_test(&file->f_count)) {
+ file_list_lock();
+ list_del(&file->f_list);
+ list_add(&file->f_list, &free_list);
+ nr_free_files++;
+ file_list_unlock();
+ }
+}
+
+void file_move(struct file *file, struct list_head *list)
+{
+ if (!list)
+ return;
+ file_list_lock();
+ list_del(&file->f_list);
+ list_add(&file->f_list, list);
+ file_list_unlock();
+}
+
+void file_moveto(struct file *new, struct file *old)
+{
+ file_list_lock();
+ list_del(&new->f_list);
+ list_add(&new->f_list, &old->f_list);
+ file_list_unlock();
+}
+
+int fs_may_remount_ro(struct super_block *sb)
+{
+ struct list_head *p;
+
+ /* Check that no files are currently opened for writing. */
+ file_list_lock();
+ for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
+ struct file *file = list_entry(p, struct file, f_list);
+ struct inode *inode = file->f_dentry->d_inode;
+ if (!inode)
+ continue;
+
+ /* File with pending delete? */
+ if (inode->i_nlink == 0)
+ goto too_bad;
+
+ /* Writable file? */
+ if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
+ return 0;
}
+ file_list_unlock();
+ return 1; /* Tis' cool bro. */
+too_bad:
+ file_list_unlock();
+ return 0;
}
max_inodes = max;
}
-/* This belongs in file_table.c, not here... */
-int fs_may_remount_ro(struct super_block *sb)
-{
- struct file *file;
-
- /* Check that no files are currently opened for writing. */
- for (file = inuse_filps; file; file = file->f_next) {
- struct inode *inode;
- if (!file->f_dentry)
- continue;
- inode = file->f_dentry->d_inode;
- if (!inode || inode->i_sb != sb)
- continue;
-
- /* File with pending delete? */
- if (inode->i_nlink == 0)
- return 0;
-
- /* Writable file? */
- if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
- return 0;
- }
- return 1; /* Tis' cool bro. */
-}
-
void update_atime (struct inode *inode)
{
if ( IS_NOATIME (inode) ) return;
goto out_req;
/* Put the task on inode's writeback request list. */
+ get_file(file);
wreq->wb_file = file;
wreq->wb_pid = current->pid;
wreq->wb_page = page;
* The IO completion will then free the page and the dentry.
*/
get_page(page);
- atomic_inc(&file->f_count);
/* Schedule request */
synchronous = schedule_write_request(req, synchronous);
f->f_op = NULL;
if (inode->i_op)
f->f_op = inode->i_op->default_file_ops;
+ if (inode->i_sb)
+ file_move(f, &inode->i_sb->s_files);
if (f->f_op && f->f_op->open) {
error = f->f_op->open(inode,f);
if (error)
tsk->tty ? kdev_t_to_nr(tsk->tty->device) : 0,
tty_pgrp,
tsk->flags,
- tsk->mm ? tsk->mm->min_flt : 0,
- tsk->mm ? tsk->mm->cmin_flt : 0,
- tsk->mm ? tsk->mm->maj_flt : 0,
- tsk->mm ? tsk->mm->cmaj_flt : 0,
+ tsk->min_flt,
+ tsk->cmin_flt,
+ tsk->maj_flt,
+ tsk->cmaj_flt,
tsk->times.tms_utime,
tsk->times.tms_stime,
tsk->times.tms_cutime,
sigign .sig[0] & 0x7fffffffUL,
sigcatch .sig[0] & 0x7fffffffUL,
wchan,
- tsk->mm ? tsk->mm->nswap : 0,
- tsk->mm ? tsk->mm->cnswap : 0,
+ tsk->nswap,
+ tsk->cnswap,
tsk->exit_signal,
tsk->processor);
}
}
}
+struct super_block *proc_super_blocks = NULL;
+
+static void proc_put_super(struct super_block *sb)
+{
+ struct super_block **p = &proc_super_blocks;
+ while (*p != sb) {
+ if (!*p) /* should never happen */
+ return;
+ p = (struct super_block **)&(*p)->u.generic_sbp;
+ }
+ *p = (struct super_block *)(*p)->u.generic_sbp;
+}
+
static struct super_operations proc_sops = {
proc_read_inode,
proc_write_inode,
proc_put_inode,
proc_delete_inode, /* delete_inode(struct inode *) */
NULL,
- NULL,
+ proc_put_super,
NULL,
proc_statfs,
NULL
if (!s->s_root)
goto out_no_root;
parse_options(data, &root_inode->i_uid, &root_inode->i_gid);
+ s->u.generic_sbp = (void*) proc_super_blocks;
+ proc_super_blocks = s;
unlock_super(s);
return s;
return -ENOMEM;
if (!pte_present(*src_table))
- handle_mm_fault(tsk->mm, src_vma, stmp, 1);
+ handle_mm_fault(tsk, src_vma, stmp, 1);
if ((vma->vm_flags & VM_WRITE) && !pte_write(*src_table))
- handle_mm_fault(tsk->mm, src_vma, stmp, 1);
+ handle_mm_fault(tsk, src_vma, stmp, 1);
set_pte(src_table, pte_mkdirty(*src_table));
set_pte(dest_table, *src_table);
*/
static void proc_kill_inodes(int ino)
{
- struct file *filp;
-
- /* inuse_filps is protected by the single kernel lock */
- for (filp = inuse_filps; filp; filp = filp->f_next) {
- struct dentry * dentry;
- struct inode * inode;
-
- dentry = filp->f_dentry;
- if (!dentry)
- continue;
- if (dentry->d_op != &proc_dentry_operations)
- continue;
- inode = dentry->d_inode;
- if (!inode)
- continue;
- if (inode->i_ino != ino)
- continue;
- filp->f_op = NULL;
+ struct list_head *p;
+ struct super_block *sb;
+
+ /*
+ * Actually it's a partial revoke(). We have to go through all
+ * copies of procfs. proc_super_blocks is protected by the big
+ * lock for the time being.
+ */
+ for (sb = proc_super_blocks;
+ sb;
+ sb = (struct super_block*)sb->u.generic_sbp) {
+ file_list_lock();
+ for (p = sb->s_files.next; p != &sb->s_files; p = p->next) {
+ struct file * filp = list_entry(p, struct file, f_list);
+ struct dentry * dentry;
+ struct inode * inode;
+
+ dentry = filp->f_dentry;
+ if (!dentry)
+ continue;
+ if (dentry->d_op != &proc_dentry_operations)
+ continue;
+ inode = dentry->d_inode;
+ if (!inode)
+ continue;
+ if (inode->i_ino != ino)
+ continue;
+ filp->f_op = NULL;
+ }
+ file_list_unlock();
}
}
INIT_LIST_HEAD(&s->s_dirty);
list_add (&s->s_list, super_blocks.prev);
init_waitqueue_head(&s->s_wait);
+ INIT_LIST_HEAD(&s->s_files);
}
return s;
}
#ifndef __LINUX_FILE_H
#define __LINUX_FILE_H
-extern void __fput(struct file *);
+extern void __fput(struct file *); /* goner? */
+extern void _fput(struct file *);
/*
* Check whether the specified task has the fd open. Since the task
* I suspect there are many other similar "optimizations" across the
* kernel...
*/
-extern void fput(struct file *);
+extern inline void fput(struct file * file)
+{
+ if (atomic_dec_and_test(&file->f_count))
+ _fput(file);
+}
extern void put_filp(struct file *);
#endif /* __LINUX_FILE_H */
};
struct file {
- struct file *f_next, **f_pprev;
+ struct list_head f_list;
struct dentry *f_dentry;
struct file_operations *f_op;
atomic_t f_count;
/* needed for tty driver, and maybe others */
void *private_data;
};
+extern spinlock_t files_lock;
+#define file_list_lock() spin_lock(&files_lock);
+#define file_list_unlock() spin_unlock(&files_lock);
#define get_file(x) atomic_inc(&(x)->f_count)
#define file_count(x) atomic_read(&(x)->f_count)
short int s_ibasket_count;
short int s_ibasket_max;
struct list_head s_dirty; /* dirty inodes */
+ struct list_head s_files;
union {
struct minix_sb_info minix_sb;
extern int fs_may_remount_ro(struct super_block *);
extern int fs_may_mount(kdev_t);
-extern struct file *inuse_filps;
-
extern int try_to_free_buffers(struct page *);
extern void refile_buffer(struct buffer_head * buf);
extern void insert_inode_hash(struct inode *);
extern void remove_inode_hash(struct inode *);
extern struct file * get_empty_filp(void);
+extern void file_move(struct file *f, struct list_head *list);
+extern void file_moveto(struct file *new, struct file *old);
extern struct buffer_head * get_hash_table(kdev_t, int, int);
extern struct buffer_head * getblk(kdev_t, int, int);
extern void ll_rw_block(int, int, struct buffer_head * bh[]);
extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
extern void vmtruncate(struct inode * inode, unsigned long offset);
-extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
-extern void make_pages_present(unsigned long addr, unsigned long end);
+extern int handle_mm_fault(struct task_struct *tsk,struct vm_area_struct *vma, unsigned long address, int write_access);
+extern int make_pages_present(unsigned long addr, unsigned long end);
extern int pgt_cache_water[2];
extern int check_pgt_cache(void);
return vma;
}
-extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
+extern struct vm_area_struct *find_extend_vma(struct task_struct *tsk, unsigned long addr);
#define buffer_under_min() ((atomic_read(&buffermem) >> PAGE_SHIFT) * 100 < \
buffer_mem.min_percent * num_physpages)
}
}
+extern struct super_block *proc_super_blocks;
extern struct dentry_operations proc_dentry_operations;
extern struct super_block *proc_read_super(struct super_block *,void *,int);
extern int init_proc_fs(void);
atomic_t count;
int map_count; /* number of VMAs */
struct semaphore mmap_sem;
- rwlock_t page_table_lock;
+ spinlock_t page_table_lock;
unsigned long context;
- unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
- int swappable:1;
unsigned long start_code, end_code, start_data, end_data;
unsigned long start_brk, brk, start_stack;
unsigned long arg_start, arg_end, env_start, env_end;
swapper_pg_dir, \
ATOMIC_INIT(1), 1, \
__MUTEX_INITIALIZER(name.mmap_sem), \
- RW_LOCK_UNLOCKED, \
- 0, \
- 0, 0, 0, 0, 0, 0, \
+ SPIN_LOCK_UNLOCKED, \
0, \
0, 0, 0, 0, \
0, 0, 0, \
struct tms times;
unsigned long start_time;
long per_cpu_utime[NR_CPUS], per_cpu_stime[NR_CPUS];
+/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
+ unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
+ int swappable:1;
/* process credentials */
uid_t uid,euid,suid,fsuid;
gid_t gid,egid,sgid,fsgid;
/* timer */ { NULL, NULL, 0, 0, it_real_fn }, \
/* utime */ {0,0,0,0},0, \
/* per CPU times */ {0, }, {0, }, \
+/* flt */ 0,0,0,0,0,0, \
+/* swp */ 0, \
/* process credentials */ \
/* uid etc */ 0,0,0,0,0,0,0,0, \
/* suppl grps*/ 0, {0,}, \
struct tq_struct tq_hangup;
void *disc_data;
void *driver_data;
+ struct list_head tty_files;
#define N_TTY_BUF_SIZE 4096
} while(0);
struct sock {
- /* This must be first. */
- struct sock *sklist_next;
- struct sock *sklist_prev;
-
/* Local port binding hash linkage. */
struct sock *bind_next;
struct sock **bind_pprev;
* transport -> network interface is defined by struct inet_proto
*/
struct proto {
- /* These must be first. */
- struct sock *sklist_next;
- struct sock *sklist_prev;
-
void (*close)(struct sock *sk,
long timeout);
int (*connect)(struct sock *sk,
/* Keeping track of sk's, looking them up, and port selection methods. */
void (*hash)(struct sock *sk);
void (*unhash)(struct sock *sk);
- void (*rehash)(struct sock *sk);
- unsigned short (*good_socknum)(void);
- int (*verify_bind)(struct sock *sk, unsigned short snum);
+ int (*get_port)(struct sock *sk, unsigned short snum);
unsigned short max_header;
unsigned long retransmits;
#define SOCKHASH_LOCK_WRITE_BH() write_lock(&sockhash_lock)
#define SOCKHASH_UNLOCK_WRITE_BH() write_unlock(&sockhash_lock)
-/* Some things in the kernel just want to get at a protocols
- * entire socket list commensurate, thus...
- */
-static __inline__ void add_to_prot_sklist(struct sock *sk)
-{
- SOCKHASH_LOCK_WRITE();
- if(!sk->sklist_next) {
- struct proto *p = sk->prot;
-
- sk->sklist_prev = (struct sock *) p;
- sk->sklist_next = p->sklist_next;
- p->sklist_next->sklist_prev = sk;
- p->sklist_next = sk;
-
- /* Charge the protocol. */
- sk->prot->inuse += 1;
- if(sk->prot->highestinuse < sk->prot->inuse)
- sk->prot->highestinuse = sk->prot->inuse;
- }
- SOCKHASH_UNLOCK_WRITE();
-}
-
-static __inline__ void del_from_prot_sklist(struct sock *sk)
-{
- SOCKHASH_LOCK_WRITE();
- if(sk->sklist_next) {
- sk->sklist_next->sklist_prev = sk->sklist_prev;
- sk->sklist_prev->sklist_next = sk->sklist_next;
- sk->sklist_next = NULL;
- sk->prot->inuse--;
- }
- SOCKHASH_UNLOCK_WRITE();
-}
-
/* Used by processes to "lock" a socket state, so that
* interrupts and bottom half handlers won't change it
* from under us. It essentially blocks any incoming
*/
struct tcp_bind_bucket {
unsigned short port;
- unsigned short flags;
-#define TCPB_FLAG_LOCKED 0x0001
-#define TCPB_FLAG_FASTREUSE 0x0002
-#define TCPB_FLAG_GOODSOCKNUM 0x0004
-
+ unsigned short fastreuse;
struct tcp_bind_bucket *next;
struct sock *owners;
struct tcp_bind_bucket **pprev;
return (lport & (tcp_bhash_size - 1));
}
-static __inline__ void tcp_sk_bindify(struct sock *sk)
-{
- struct tcp_bind_bucket *tb;
- unsigned short snum = sk->num;
-
- for(tb = tcp_bhash[tcp_bhashfn(snum)]; tb->port != snum; tb = tb->next)
- ;
- /* Update bucket flags. */
- if(tb->owners == NULL) {
- /* We're the first. */
- if(sk->reuse && sk->state != TCP_LISTEN)
- tb->flags = TCPB_FLAG_FASTREUSE;
- else
- tb->flags = 0;
- } else {
- if((tb->flags & TCPB_FLAG_FASTREUSE) &&
- ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))
- tb->flags &= ~TCPB_FLAG_FASTREUSE;
- }
- if((sk->bind_next = tb->owners) != NULL)
- tb->owners->bind_pprev = &sk->bind_next;
- tb->owners = sk;
- sk->bind_pprev = &tb->owners;
- sk->prev = (struct sock *) tb;
-}
-
/* This is a TIME_WAIT bucket. It works around the memory consumption
* problems of sockets in such a state on heavily loaded servers, but
* without violating the protocol specification.
* XXX Yes I know this is gross, but I'd have to edit every single
* XXX networking file if I created a "struct sock_header". -DaveM
*/
- struct sock *sklist_next;
- struct sock *sklist_prev;
struct sock *bind_next;
struct sock **bind_pprev;
__u32 daddr;
extern struct proto tcp_prot;
extern struct tcp_mib tcp_statistics;
-extern unsigned short tcp_good_socknum(void);
+extern void tcp_put_port(struct sock *sk);
+extern void __tcp_put_port(struct sock *sk);
+extern void tcp_inherit_port(struct sock *sk, struct sock *child);
extern void tcp_v4_err(struct sk_buff *skb,
unsigned char *, int);
#define TCP_SLT_SYNACK 0
#define TCP_SLT_KEEPALIVE 1
#define TCP_SLT_TWKILL 2
-#define TCP_SLT_BUCKETGC 3
-#define TCP_SLT_MAX 4
+#define TCP_SLT_MAX 3
extern struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX];
atomic_dec(&slt->count);
}
-/* This needs to use a slow timer, so it is here. */
-static __inline__ void tcp_sk_unbindify(struct sock *sk)
-{
- struct tcp_bind_bucket *tb = (struct tcp_bind_bucket *) sk->prev;
- if(sk->bind_next)
- sk->bind_next->bind_pprev = sk->bind_pprev;
- *sk->bind_pprev = sk->bind_next;
- if(tb->owners == NULL)
- tcp_inc_slow_timer(TCP_SLT_BUCKETGC);
-}
-
extern const char timer_bug_msg[];
static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
#define _UDP_H
#include <linux/udp.h>
+#include <net/sock.h>
#define UDP_HTABLE_SIZE 128
*/
extern struct sock *udp_hash[UDP_HTABLE_SIZE];
-extern unsigned short udp_good_socknum(void);
+extern int udp_port_rover;
+
+static inline int udp_lport_inuse(u16 num)
+{
+ struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)];
+
+ for(; sk != NULL; sk = sk->next) {
+ if(sk->num == num)
+ return 1;
+ }
+ return 0;
+}
/* Note: this must match 'valbool' in sock_setsockopt */
#define UDP_CSUM_NOXMIT 1
pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
shp->shm_pages[idx] = pte_val(pte);
} else
- --current->mm->maj_flt; /* was incremented in do_no_page */
+ --current->maj_flt; /* was incremented in do_no_page */
done: /* pte_val(pte) == shp->shm_pages[idx] */
- current->mm->min_flt++;
+ current->min_flt++;
get_page(mem_map + MAP_NR(pte_page(pte)));
return pte_page(pte);
}
*/
if (!file)
return 0;
- atomic_inc(&file->f_count);
+ get_file(file);
if (!check_free_space(file)) {
fput(file);
return 0;
write_unlock_irq(&tasklist_lock);
release_thread(p);
-#if 0 /* FIXME! How do we do this right for threads? */
current->cmin_flt += p->min_flt + p->cmin_flt;
current->cmaj_flt += p->maj_flt + p->cmaj_flt;
current->cnswap += p->nswap + p->cnswap;
-#endif
free_task_struct(p);
} else {
printk("task releasing itself\n");
flush_tlb_mm(mm);
destroy_context(mm);
tsk->mm = &init_mm;
+ tsk->swappable = 0;
SET_PAGE_DIR(tsk, swapper_pg_dir);
mm_release();
mmput(mm);
mm->map_count = 0;
mm->def_flags = 0;
init_MUTEX_LOCKED(&mm->mmap_sem);
- mm->page_table_lock = RW_LOCK_UNLOCKED;
+ mm->page_table_lock = SPIN_LOCK_UNLOCKED;
/*
* Leave mm->pgd set to the parent's pgd
* so that pgd_offset() is always valid.
* cache or tlb.
*/
mm->cpu_vm_mask = 0;
- mm->swappable = 0;
}
return mm;
}
goto fail_nomem;
tsk->mm = mm;
+ tsk->min_flt = tsk->maj_flt = 0;
+ tsk->cmin_flt = tsk->cmaj_flt = 0;
+ tsk->nswap = tsk->cnswap = 0;
copy_segments(nr, tsk, mm);
retval = new_page_tables(tsk);
if (retval)
__MOD_INC_USE_COUNT(p->binfmt->module);
p->did_exec = 0;
+ p->swappable = 0;
p->state = TASK_UNINTERRUPTIBLE;
copy_flags(clone_flags, p);
p->semundo = NULL;
/* ok, now we should be set up.. */
- p->mm->swappable = 1;
+ p->swappable = 1;
p->exit_signal = clone_flags & CSIGNAL;
p->pdeath_signal = 0;
EXPORT_SYMBOL(get_super);
EXPORT_SYMBOL(get_fs_type);
EXPORT_SYMBOL(getname);
-EXPORT_SYMBOL(__fput);
+EXPORT_SYMBOL(__fput); /* goner? */
+EXPORT_SYMBOL(_fput);
EXPORT_SYMBOL(igrab);
EXPORT_SYMBOL(iunique);
EXPORT_SYMBOL(iget);
EXPORT_SYMBOL(init_private_file);
EXPORT_SYMBOL(filp_open);
EXPORT_SYMBOL(filp_close);
-EXPORT_SYMBOL(fput);
EXPORT_SYMBOL(put_filp);
+EXPORT_SYMBOL(files_lock);
EXPORT_SYMBOL(check_disk_change);
EXPORT_SYMBOL(invalidate_buffers);
EXPORT_SYMBOL(invalidate_inodes);
r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime);
r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime);
r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime);
- r.ru_minflt = 0;
- r.ru_majflt = 0;
- r.ru_nswap = 0;
+ r.ru_minflt = p->min_flt;
+ r.ru_majflt = p->maj_flt;
+ r.ru_nswap = p->nswap;
break;
case RUSAGE_CHILDREN:
r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_cutime);
r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_cutime);
r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_cstime);
r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_cstime);
- r.ru_minflt = 0;
- r.ru_majflt = 0;
- r.ru_nswap = 0;
+ r.ru_minflt = p->cmin_flt;
+ r.ru_majflt = p->cmaj_flt;
+ r.ru_nswap = p->cnswap;
break;
default:
r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime + p->times.tms_cutime);
r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime + p->times.tms_cutime);
r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime + p->times.tms_cstime);
r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime + p->times.tms_cstime);
- r.ru_minflt = 0;
- r.ru_majflt = 0;
- r.ru_nswap = 0;
+ r.ru_minflt = p->min_flt + p->cmin_flt;
+ r.ru_majflt = p->maj_flt + p->cmaj_flt;
+ r.ru_nswap = p->nswap + p->cnswap;
break;
}
return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
* We enter with the page table read-lock held, and need to exit without
* it.
*/
-static int do_wp_page(struct mm_struct * mm, struct vm_area_struct * vma,
+static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
unsigned long address, pte_t *page_table, pte_t pte)
{
unsigned long old_page, new_page;
old_page = pte_page(pte);
if (MAP_NR(old_page) >= max_mapnr)
goto bad_wp_page;
- mm->min_flt++;
+ tsk->min_flt++;
page = mem_map + MAP_NR(old_page);
/*
flush_cache_page(vma, address);
set_pte(page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
flush_tlb_page(vma, address);
- read_unlock(&mm->page_table_lock);
+ spin_unlock(&tsk->mm->page_table_lock);
return 1;
}
/*
* Ok, we need to copy. Oh, well..
*/
- read_unlock(&mm->page_table_lock);
+ spin_unlock(&tsk->mm->page_table_lock);
new_page = __get_free_page(GFP_USER);
if (!new_page)
- return 0;
- read_lock(&mm->page_table_lock);
+ return -1;
+ spin_lock(&tsk->mm->page_table_lock);
/*
* Re-check the pte - we dropped the lock
/* Free the old page.. */
new_page = old_page;
}
- read_unlock(&mm->page_table_lock);
+ spin_unlock(&tsk->mm->page_table_lock);
free_page(new_page);
return 1;
bad_wp_page:
printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
- return 0;
+ return -1;
}
/*
return;
}
-static int do_swap_page(struct mm_struct * mm,
+static int do_swap_page(struct task_struct * tsk,
struct vm_area_struct * vma, unsigned long address,
pte_t * page_table, unsigned long entry, int write_access)
{
page = read_swap_cache(entry);
unlock_kernel();
if (!page)
- return 0;
+ return -1;
flush_page_to_ram(page_address(page));
}
vma->vm_mm->rss++;
- mm->min_flt++;
+ tsk->min_flt++;
swap_free(entry);
pte = mk_pte(page_address(page), vma->vm_page_prot);
pte = pte_mkwrite(pte_mkdirty(pte));
}
set_pte(page_table, pte);
-
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache(vma, address, pte);
return 1;
}
/*
* This only needs the MM semaphore
*/
-static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
+static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
{
pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
if (write_access) {
unsigned long page = __get_free_page(GFP_USER);
if (!page)
- return 0;
+ return -1;
clear_page(page);
entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
vma->vm_mm->rss++;
- mm->min_flt++;
+ tsk->min_flt++;
flush_page_to_ram(page);
}
set_pte(page_table, entry);
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache(vma, addr, entry);
return 1;
}
* This is called with the MM semaphore and the kernel lock held.
* We need to release the kernel lock as soon as possible..
*/
-static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
+static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
unsigned long address, int write_access, pte_t *page_table)
{
unsigned long page;
pte_t entry;
if (!vma->vm_ops || !vma->vm_ops->nopage)
- return do_anonymous_page(mm, vma, page_table, write_access, address);
+ return do_anonymous_page(tsk, vma, page_table, write_access, address);
/*
* The third argument is "no_share", which tells the low-level code
*/
page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access);
if (!page)
- return 0;
+ return 0; /* SIGBUS - but we _really_ should know whether it is OOM or SIGBUS */
- ++mm->maj_flt;
+ ++tsk->maj_flt;
++vma->vm_mm->rss;
/*
* This silly early PAGE_DIRTY setting removes a race
entry = pte_wrprotect(entry);
set_pte(page_table, entry);
/* no need to invalidate: a not-present page shouldn't be cached */
+ update_mmu_cache(vma, address, entry);
return 1;
}
* so we don't need to worry about a page being suddenly been added into
* our VM.
*/
-static inline int handle_pte_fault(struct mm_struct *mm,
+static inline int handle_pte_fault(struct task_struct *tsk,
struct vm_area_struct * vma, unsigned long address,
int write_access, pte_t * pte)
{
entry = *pte;
if (!pte_present(entry)) {
if (pte_none(entry))
- return do_no_page(mm, vma, address, write_access, pte);
- return do_swap_page(mm, vma, address, pte, pte_val(entry), write_access);
+ return do_no_page(tsk, vma, address, write_access, pte);
+ return do_swap_page(tsk, vma, address, pte, pte_val(entry), write_access);
}
/*
* lock to synchronize with kswapd, and verify that the entry
* didn't change from under us..
*/
- read_lock(&mm->page_table_lock);
+ spin_lock(&tsk->mm->page_table_lock);
if (pte_val(entry) == pte_val(*pte)) {
if (write_access) {
if (!pte_write(entry))
- return do_wp_page(mm, vma, address, pte, entry);
+ return do_wp_page(tsk, vma, address, pte, entry);
entry = pte_mkdirty(entry);
}
- set_pte(pte, pte_mkyoung(entry));
+ entry = pte_mkyoung(entry);
+ set_pte(pte, entry);
flush_tlb_page(vma, address);
+ update_mmu_cache(vma, address, entry);
}
- read_unlock(&mm->page_table_lock);
+ spin_unlock(&tsk->mm->page_table_lock);
return 1;
}
/*
* By the time we get here, we already hold the mm semaphore
*/
-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
+int handle_mm_fault(struct task_struct *tsk, struct vm_area_struct * vma,
unsigned long address, int write_access)
{
pgd_t *pgd;
pmd = pmd_alloc(pgd, address);
if (pmd) {
pte_t * pte = pte_alloc(pmd, address);
- if (pte) {
- if (handle_pte_fault(mm, vma, address, write_access, pte)) {
- update_mmu_cache(vma, address, *pte);
- return 1;
- }
- }
+ if (pte)
+ return handle_pte_fault(tsk, vma, address, write_access, pte);
}
- return 0;
+ return -1;
}
/*
* Simplistic page force-in..
*/
-void make_pages_present(unsigned long addr, unsigned long end)
+int make_pages_present(unsigned long addr, unsigned long end)
{
int write;
- struct mm_struct *mm = current->mm;
+ struct task_struct *tsk = current;
struct vm_area_struct * vma;
- vma = find_vma(mm, addr);
+ vma = find_vma(tsk->mm, addr);
write = (vma->vm_flags & VM_WRITE) != 0;
while (addr < end) {
- handle_mm_fault(mm, vma, addr, write);
+ if (handle_mm_fault(tsk, vma, addr, write) < 0)
+ return -1;
addr += PAGE_SIZE;
}
+ return 0;
}
return NULL;
}
-struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr)
+struct vm_area_struct * find_extend_vma(struct task_struct * tsk, unsigned long addr)
{
struct vm_area_struct * vma;
unsigned long start;
addr &= PAGE_MASK;
- vma = find_vma(mm,addr);
+ vma = find_vma(tsk->mm,addr);
if (!vma)
return NULL;
if (vma->vm_start <= addr)
goto out_failed;
page = mem_map + MAP_NR(page_addr);
- write_lock(&tsk->mm->page_table_lock);
+ spin_lock(&tsk->mm->page_table_lock);
if (pte_val(pte) != pte_val(*page_table))
goto out_failed_unlock;
if (vma->vm_ops && vma->vm_ops->swapout) {
pid_t pid = tsk->pid;
pte_clear(page_table);
- write_unlock(&tsk->mm->page_table_lock);
+ spin_unlock(&tsk->mm->page_table_lock);
flush_tlb_page(vma, address);
vma->vm_mm->rss--;
goto out_failed; /* No swap space left */
vma->vm_mm->rss--;
- tsk->mm->nswap++;
+ tsk->nswap++;
set_pte(page_table, __pte(entry));
- write_unlock(&tsk->mm->page_table_lock);
+ spin_unlock(&tsk->mm->page_table_lock);
flush_tlb_page(vma, address);
swap_duplicate(entry); /* One for the process, one for the swap cache */
__free_page(page);
return 1;
out_failed_unlock:
- write_unlock(&tsk->mm->page_table_lock);
+ spin_unlock(&tsk->mm->page_table_lock);
out_failed:
return 0;
}
read_lock(&tasklist_lock);
p = init_task.next_task;
for (; p != &init_task; p = p->next_task) {
- if (!p->mm->swappable)
+ if (!p->swappable)
continue;
if (p->mm->rss <= 0)
continue;
break;
}
/* Bump the usage count and install the file. */
- atomic_inc(&fp[i]->f_count);
- current->files->fd[new_fd] = fp[i];
+ get_file(fp[i]);
+ fd_install(new_fd, fp[i]);
}
if (i > 0)
new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL);
if (new_fpl) {
- memcpy(new_fpl, fpl, sizeof(*fpl));
-
for (i=fpl->count-1; i>=0; i--)
- atomic_inc(&fpl->fp[i]->f_count);
+ get_file(fpl->fp[i]);
+ memcpy(new_fpl, fpl, sizeof(*fpl));
}
return new_fpl;
}
(opt->station == station || opt->station == 0) &&
(opt->net == net || opt->net == 0))
return sk;
- sk = sk->sklist_next;
+
+ sk = sk->next;
}
return NULL;
*
* PF_INET protocol family socket handler.
*
- * Version: $Id: af_inet.c,v 1.91 1999/06/09 08:28:55 davem Exp $
+ * Version: $Id: af_inet.c,v 1.93 1999/07/02 11:26:24 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
static __inline__ void kill_sk_now(struct sock *sk)
{
- /* No longer exists. */
- del_from_prot_sklist(sk);
-
/* Remove from protocol hash chains. */
sk->prot->unhash(sk);
{
struct sock *sk=sock->sk;
if (sk->prot->setsockopt==NULL)
- return(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
return sk->prot->setsockopt(sk,level,optname,optval,optlen);
}
{
struct sock *sk=sock->sk;
if (sk->prot->getsockopt==NULL)
- return(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
return sk->prot->getsockopt(sk,level,optname,optval,optlen);
}
{
/* We may need to bind the socket. */
if (sk->num == 0) {
- sk->num = sk->prot->good_socknum();
- if (sk->num == 0)
- return(-EAGAIN);
+ if (sk->prot->get_port(sk, 0) != 0)
+ return -EAGAIN;
sk->sport = htons(sk->num);
sk->prot->hash(sk);
- add_to_prot_sklist(sk);
}
return 0;
}
int inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
+ unsigned char old_state;
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
- return(-EINVAL);
-
- if (inet_autobind(sk) != 0)
- return -EAGAIN;
+ return -EINVAL;
- /* We might as well re use these. */
if ((unsigned) backlog == 0) /* BSDism */
backlog = 1;
if ((unsigned) backlog > SOMAXCONN)
backlog = SOMAXCONN;
sk->max_ack_backlog = backlog;
- if (sk->state != TCP_LISTEN) {
- sk->ack_backlog = 0;
+
+ /* Really, if the socket is already in listen state
+ * we can only allow the backlog to be adjusted.
+ */
+ old_state = sk->state;
+ if (old_state != TCP_LISTEN) {
sk->state = TCP_LISTEN;
+ sk->ack_backlog = 0;
+ if (sk->num == 0) {
+ if (sk->prot->get_port(sk, 0) != 0) {
+ sk->state = old_state;
+ return -EAGAIN;
+ }
+ sk->sport = htons(sk->num);
+ }
+
dst_release(xchg(&sk->dst_cache, NULL));
- sk->prot->rehash(sk);
- add_to_prot_sklist(sk);
+ sk->prot->hash(sk);
+ sk->socket->flags |= SO_ACCEPTCON;
sk->write_space = inet_listen_write_space;
}
- sk->socket->flags |= SO_ACCEPTCON;
- return(0);
+ return 0;
}
/*
/* Add to protocol hash chains. */
sk->prot->hash(sk);
- add_to_prot_sklist(sk);
}
if (sk->prot->init) {
*/
timeout = 0;
if (sk->linger && !(current->flags & PF_EXITING)) {
- timeout = MAX_SCHEDULE_TIMEOUT;
-
- /* XXX This makes no sense whatsoever... -DaveM */
- if (!sk->lingertime)
- timeout = HZ*sk->lingertime;
+ timeout = HZ * sk->lingertime;
+ if (!timeout)
+ timeout = MAX_SCHEDULE_TIMEOUT;
}
sock->sk = NULL;
sk->socket = NULL;
if((snum >= PORT_MASQ_BEGIN) && (snum <= PORT_MASQ_END))
return -EADDRINUSE;
#endif
- if (snum == 0)
- snum = sk->prot->good_socknum();
- if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
- return(-EACCES);
+ if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+ return -EACCES;
/* Make sure we are allowed to bind here. */
- if(sk->prot->verify_bind(sk, snum))
+ if (sk->prot->get_port(sk, snum) != 0)
return -EADDRINUSE;
- sk->num = snum;
- sk->sport = htons(snum);
+ sk->sport = htons(sk->num);
sk->daddr = 0;
sk->dport = 0;
- sk->prot->rehash(sk);
- add_to_prot_sklist(sk);
+ sk->prot->hash(sk);
dst_release(sk->dst_cache);
sk->dst_cache=NULL;
return(0);
int err;
if (inet_autobind(sk) != 0)
- return(-EAGAIN);
+ return -EAGAIN;
if (sk->prot->connect == NULL)
- return(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
err = sk->prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
if (err < 0)
- return(err);
+ return err;
return(0);
}
if (flags & O_NONBLOCK)
return -EALREADY;
} else {
+ if (sk->prot->connect == NULL)
+ return -EOPNOTSUPP;
+
/* We may need to bind the socket. */
if (inet_autobind(sk) != 0)
- return(-EAGAIN);
- if (sk->prot->connect == NULL)
- return(-EOPNOTSUPP);
+ return -EAGAIN;
+
err = sk->prot->connect(sk, uaddr, addr_len);
/* Note: there is a theoretical race here when an wake up
occurred before inet_wait_for_connect is entered. In 2.3
the wait queue setup should be moved before the low level
connect call. -AK*/
if (err < 0)
- return(err);
+ return err;
sock->state = SS_CONNECTING;
}
goto sock_error;
if (sk->state != TCP_ESTABLISHED && (flags & O_NONBLOCK))
- return (-EINPROGRESS);
+ return -EINPROGRESS;
if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
inet_wait_for_connect(sk);
sock->state = SS_CONNECTED;
if ((sk->state != TCP_ESTABLISHED) && sk->err)
goto sock_error;
- return(0);
+ return 0;
sock_error:
/* This is ugly but needed to fix a race in the ICMP error handler */
sin->sin_family = AF_INET;
if (peer) {
if (!tcp_connected(sk->state))
- return(-ENOTCONN);
+ return -ENOTCONN;
sin->sin_port = sk->dport;
sin->sin_addr.s_addr = sk->daddr;
} else {
int err;
if (sock->flags & SO_ACCEPTCON)
- return(-EINVAL);
+ return -EINVAL;
if (sk->prot->recvmsg == NULL)
- return(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
/* We may need to bind the socket. */
if (inet_autobind(sk) != 0)
- return(-EAGAIN);
+ return -EAGAIN;
err = sk->prot->recvmsg(sk, msg, size, flags&MSG_DONTWAIT,
flags&~MSG_DONTWAIT, &addr_len);
if (err >= 0)
if (sk->shutdown & SEND_SHUTDOWN) {
if (!(msg->msg_flags&MSG_NOSIGNAL))
send_sig(SIGPIPE, current, 1);
- return(-EPIPE);
+ return -EPIPE;
}
if (sk->prot->sendmsg == NULL)
- return(-EOPNOTSUPP);
+ return -EOPNOTSUPP;
if(sk->err)
return sock_error(sk);
/* We may need to bind the socket. */
- if(inet_autobind(sk) != 0)
+ if (inet_autobind(sk) != 0)
return -EAGAIN;
return sk->prot->sendmsg(sk, msg, size);
1->2 bit 2 snds.
2->3 */
if ((how & ~SHUTDOWN_MASK) || how==0) /* MAXINT->0 */
- return(-EINVAL);
+ return -EINVAL;
+ if (!sk)
+ return -ENOTCONN;
if (sock->state == SS_CONNECTING && sk->state == TCP_ESTABLISHED)
sock->state = SS_CONNECTED;
- if (!sk || !tcp_connected(sk->state))
- return(-ENOTCONN);
+ if (!tcp_connected(sk->state))
+ return -ENOTCONN;
sk->shutdown |= how;
if (sk->prot->shutdown)
sk->prot->shutdown(sk, how);
* PROC file system. It is mainly used for debugging and
* statistics.
*
- * Version: $Id: proc.c,v 1.35 1999/05/27 00:37:38 davem Exp $
+ * Version: $Id: proc.c,v 1.36 1999/07/02 11:26:34 davem Exp $
*
* Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Gerald J. Heim, <heim@peanuts.informatik.uni-tuebingen.de>
#include <net/sock.h>
#include <net/raw.h>
-/* Format a single open_request into tmpbuf. */
-static inline void get__openreq(struct sock *sk, struct open_request *req,
- char *tmpbuf,
- int i)
-{
- sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u",
- i,
- (long unsigned int)req->af.v4_req.loc_addr,
- ntohs(sk->sport),
- (long unsigned int)req->af.v4_req.rmt_addr,
- ntohs(req->rmt_port),
- TCP_SYN_RECV,
- 0,0, /* could print option size, but that is af dependent. */
- 1, /* timers active (only the expire timer) */
- (unsigned long)(req->expires - jiffies),
- req->retrans,
- sk->socket ? sk->socket->inode->i_uid : 0,
- 0, /* non standard timer */
- 0 /* open_requests have no inode */
- );
-}
-
-/* Format a single socket into tmpbuf. */
-static inline void get__sock(struct sock *sp, char *tmpbuf, int i, int format)
-{
- unsigned long dest, src;
- unsigned short destp, srcp;
- int timer_active, timer_active1, timer_active2;
- int tw_bucket = 0;
- unsigned long timer_expires;
- struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
-
- dest = sp->daddr;
- src = sp->rcv_saddr;
- destp = sp->dport;
- srcp = sp->sport;
-
- /* FIXME: The fact that retransmit_timer occurs as a field
- * in two different parts of the socket structure is,
- * to say the least, confusing. This code now uses the
- * right retransmit_timer variable, but I'm not sure
- * the rest of the timer stuff is still correct.
- * In particular I'm not sure what the timeout value
- * is suppose to reflect (as opposed to tm->when). -- erics
- */
-
- destp = ntohs(destp);
- srcp = ntohs(srcp);
- if((format == 0) && (sp->state == TCP_TIME_WAIT)) {
- extern int tcp_tw_death_row_slot;
- struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sp;
- int slot_dist;
-
- tw_bucket = 1;
- timer_active1 = timer_active2 = 0;
- timer_active = 3;
- slot_dist = tw->death_slot;
- if(slot_dist > tcp_tw_death_row_slot)
- slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot;
- else
- slot_dist = tcp_tw_death_row_slot - slot_dist;
- timer_expires = jiffies + (slot_dist * TCP_TWKILL_PERIOD);
- } else {
- timer_active1 = tp->retransmit_timer.prev != NULL;
- timer_active2 = sp->timer.prev != NULL;
- timer_active = 0;
- timer_expires = (unsigned) -1;
- }
- if (timer_active1 && tp->retransmit_timer.expires < timer_expires) {
- timer_active = 1;
- timer_expires = tp->retransmit_timer.expires;
- }
- if (timer_active2 && sp->timer.expires < timer_expires) {
- timer_active = 2;
- timer_expires = sp->timer.expires;
- }
- if(timer_active == 0)
- timer_expires = jiffies;
- sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
- " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
- i, src, srcp, dest, destp, sp->state,
- (tw_bucket ?
- 0 :
- (format == 0) ?
- tp->write_seq-tp->snd_una : atomic_read(&sp->wmem_alloc)),
- (tw_bucket ?
- 0 :
- (format == 0) ?
- tp->rcv_nxt-tp->copied_seq: atomic_read(&sp->rmem_alloc)),
- timer_active, timer_expires-jiffies,
- (tw_bucket ? 0 : tp->retransmits),
- (!tw_bucket && sp->socket) ? sp->socket->inode->i_uid : 0,
- (!tw_bucket && timer_active) ? sp->timeout : 0,
- (!tw_bucket && sp->socket) ? sp->socket->inode->i_ino : 0);
-}
-
-/*
- * Get__netinfo returns the length of that string.
- *
- * KNOWN BUGS
- * As in get_unix_netinfo, the buffer might be too small. If this
- * happens, get__netinfo returns only part of the available infos.
- *
- * Assumes that buffer length is a multiply of 128 - if not it will
- * write past the end.
- */
-static int
-get__netinfo(struct proto *pro, char *buffer, int format, char **start, off_t offset, int length)
-{
- struct sock *sp, *next;
- int len=0, i = 0;
- off_t pos=0;
- off_t begin;
- char tmpbuf[129];
-
- if (offset < 128)
- len += sprintf(buffer, "%-127s\n",
- " sl local_address rem_address st tx_queue "
- "rx_queue tr tm->when retrnsmt uid timeout inode");
- pos = 128;
- SOCKHASH_LOCK_READ();
- sp = pro->sklist_next;
- while(sp != (struct sock *)pro) {
- if (format == 0 && sp->state == TCP_LISTEN) {
- struct open_request *req;
-
- for (req = sp->tp_pinfo.af_tcp.syn_wait_queue; req;
- i++, req = req->dl_next) {
- if (req->sk)
- continue;
- pos += 128;
- if (pos < offset)
- continue;
- get__openreq(sp, req, tmpbuf, i);
- len += sprintf(buffer+len, "%-127s\n", tmpbuf);
- if(len >= length)
- goto out;
- }
- }
-
- pos += 128;
- if (pos < offset)
- goto next;
-
- get__sock(sp, tmpbuf, i, format);
-
- len += sprintf(buffer+len, "%-127s\n", tmpbuf);
- if(len >= length)
- break;
- next:
- next = sp->sklist_next;
- sp = next;
- i++;
- }
-out:
- SOCKHASH_UNLOCK_READ();
-
- begin = len - (pos - offset);
- *start = buffer + begin;
- len -= begin;
- if(len>length)
- len = length;
- if (len<0)
- len = 0;
- return len;
-}
-
-int tcp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
- return get__netinfo(&tcp_prot, buffer,0, start, offset, length);
-}
-
-int udp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
- return get__netinfo(&udp_prot, buffer,1, start, offset, length);
-}
-
-int raw_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
- return get__netinfo(&raw_prot, buffer,1, start, offset, length);
-}
-
/*
* Report socket allocation statistics [mea@utu.fi]
*/
*
* RAW - implementation of IP "raw" sockets.
*
- * Version: $Id: raw.c,v 1.41 1999/05/30 01:16:19 davem Exp $
+ * Version: $Id: raw.c,v 1.42 1999/07/02 11:26:26 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
static void raw_v4_hash(struct sock *sk)
{
- struct sock **skp;
- int num = sk->num;
+ struct sock **skp = &raw_v4_htable[sk->num & (RAWV4_HTABLE_SIZE - 1)];
- num &= (RAWV4_HTABLE_SIZE - 1);
- skp = &raw_v4_htable[num];
SOCKHASH_LOCK_WRITE();
- sk->next = *skp;
+ if ((sk->next = *skp) != NULL)
+ (*skp)->pprev = &sk->next;
*skp = sk;
- sk->hashent = num;
+ sk->pprev = skp;
+ sk->prot->inuse++;
+ if(sk->prot->highestinuse < sk->prot->inuse)
+ sk->prot->highestinuse = sk->prot->inuse;
SOCKHASH_UNLOCK_WRITE();
}
static void raw_v4_unhash(struct sock *sk)
{
- struct sock **skp;
- int num = sk->num;
-
- num &= (RAWV4_HTABLE_SIZE - 1);
- skp = &raw_v4_htable[num];
-
SOCKHASH_LOCK_WRITE();
- while(*skp != NULL) {
- if(*skp == sk) {
- *skp = sk->next;
- break;
- }
- skp = &((*skp)->next);
+ if (sk->pprev) {
+ if (sk->next)
+ sk->next->pprev = sk->pprev;
+ *sk->pprev = sk->next;
+ sk->pprev = NULL;
+ sk->prot->inuse--;
}
SOCKHASH_UNLOCK_WRITE();
}
-static void raw_v4_rehash(struct sock *sk)
-{
- struct sock **skp;
- int num = sk->num;
- int oldnum = sk->hashent;
-
- num &= (RAWV4_HTABLE_SIZE - 1);
- skp = &raw_v4_htable[oldnum];
-
- SOCKHASH_LOCK_WRITE();
- while(*skp != NULL) {
- if(*skp == sk) {
- *skp = sk->next;
- break;
- }
- skp = &((*skp)->next);
- }
- sk->next = raw_v4_htable[num];
- raw_v4_htable[num] = sk;
- sk->hashent = num;
- SOCKHASH_UNLOCK_WRITE();
-}
-
static __inline__ struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
unsigned long raddr, unsigned long laddr,
int dif)
return -ENOPROTOOPT;
}
+static void get_raw_sock(struct sock *sp, char *tmpbuf, int i)
+{
+ unsigned int dest, src;
+ __u16 destp, srcp;
+ int timer_active;
+ unsigned long timer_expires;
+
+ dest = sp->daddr;
+ src = sp->rcv_saddr;
+ destp = ntohs(sp->dport);
+ srcp = ntohs(sp->sport);
+ timer_active = (sp->timer.prev != NULL) ? 2 : 0;
+ timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies);
+ sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+ i, src, srcp, dest, destp, sp->state,
+ atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
+ timer_active, timer_expires-jiffies, 0,
+ sp->socket->inode->i_uid, timer_active ? sp->timeout : 0,
+ sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+int raw_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+ int len = 0, num = 0, i;
+ off_t pos = 0;
+ off_t begin;
+ char tmpbuf[129];
+
+ if (offset < 128)
+ len += sprintf(buffer, "%-127s\n",
+ " sl local_address rem_address st tx_queue "
+ "rx_queue tr tm->when retrnsmt uid timeout inode");
+ pos = 128;
+ SOCKHASH_LOCK_READ();
+ for (i = 0; i < RAWV4_HTABLE_SIZE; i++) {
+ struct sock *sk;
+
+ for (sk = raw_v4_htable[i]; sk; sk = sk->next, num++) {
+ if (sk->family != PF_INET)
+ continue;
+ pos += 128;
+ if (pos < offset)
+ continue;
+ get_raw_sock(sk, tmpbuf, i);
+ len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+ if(len >= length)
+ goto out;
+ }
+ }
+out:
+ SOCKHASH_UNLOCK_READ();
+ begin = len - (pos - offset);
+ *start = buffer + begin;
+ len -= begin;
+ if(len > length)
+ len = length;
+ if (len < 0)
+ len = 0;
+ return len;
+}
+
struct proto raw_prot = {
- (struct sock *)&raw_prot, /* sklist_next */
- (struct sock *)&raw_prot, /* sklist_prev */
raw_close, /* close */
udp_connect, /* connect */
NULL, /* accept */
raw_rcv_skb, /* backlog_rcv */
raw_v4_hash, /* hash */
raw_v4_unhash, /* unhash */
- raw_v4_rehash, /* rehash */
- NULL, /* good_socknum */
- NULL, /* verify_bind */
+ NULL, /* get_port */
128, /* max_header */
0, /* retransmits */
"RAW", /* name */
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_input.c,v 1.169 1999/06/09 08:29:13 davem Exp $
+ * Version: $Id: tcp_input.c,v 1.170 1999/07/02 11:26:28 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
/* Must be called only from BH context. */
void tcp_timewait_kill(struct tcp_tw_bucket *tw)
{
+ struct tcp_bind_bucket *tb = tw->tb;
+
SOCKHASH_LOCK_WRITE_BH();
- /* Unlink from various places. */
+ /* Disassociate with bind bucket. */
if(tw->bind_next)
tw->bind_next->bind_pprev = tw->bind_pprev;
*(tw->bind_pprev) = tw->bind_next;
- if(tw->tb->owners == NULL)
- tcp_inc_slow_timer(TCP_SLT_BUCKETGC);
+ if (tb->owners == NULL) {
+ if (tb->next)
+ tb->next->pprev = tb->pprev;
+ *(tb->pprev) = tb->next;
+ kmem_cache_free(tcp_bucket_cachep, tb);
+ }
+ /* Unlink from established hashes. */
if(tw->next)
tw->next->pprev = tw->pprev;
*tw->pprev = tw->next;
- /* We decremented the prot->inuse count when we entered TIME_WAIT
- * and the sock from which this came was destroyed.
- */
- tw->sklist_next->sklist_prev = tw->sklist_prev;
- tw->sklist_prev->sklist_next = tw->sklist_next;
-
SOCKHASH_UNLOCK_WRITE_BH();
/* Ok, now free it up. */
sk->bind_next->bind_pprev = &tw->bind_next;
tw->bind_pprev = sk->bind_pprev;
*sk->bind_pprev = (struct sock *)tw;
+ sk->prev = NULL;
- /* Step 3: Same for the protocol sklist. */
- (tw->sklist_next = sk->sklist_next)->sklist_prev = (struct sock *)tw;
- (tw->sklist_prev = sk->sklist_prev)->sklist_next = (struct sock *)tw;
- sk->sklist_next = NULL;
+ /* Step 3: Un-charge protocol socket in-use count. */
sk->prot->inuse--;
/* Step 4: Hash TW into TIMEWAIT half of established hash table. */
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_ipv4.c,v 1.180 1999/06/09 08:29:19 davem Exp $
+ * Version: $Id: tcp_ipv4.c,v 1.181 1999/07/02 11:26:31 davem Exp $
*
* IPv4 specific functions
*
return tcp_hashfn(laddr, lport, faddr, fport);
}
-/* Invariant, sk->num is non-zero. */
-void tcp_bucket_unlock(struct sock *sk)
-{
- struct tcp_bind_bucket *tb;
- unsigned short snum = sk->num;
-
- SOCKHASH_LOCK_WRITE();
- for(tb = tcp_bhash[tcp_bhashfn(snum)]; tb; tb = tb->next) {
- if(tb->port == snum) {
- if(tb->owners == NULL &&
- (tb->flags & TCPB_FLAG_LOCKED)) {
- tb->flags &= ~(TCPB_FLAG_LOCKED |
- TCPB_FLAG_FASTREUSE);
- tcp_inc_slow_timer(TCP_SLT_BUCKETGC);
- }
- break;
- }
- }
- SOCKHASH_UNLOCK_WRITE();
-}
-
-/* The sockhash lock must be held as a writer here. */
+/* Allocate and initialize a new TCP local port bind bucket.
+ * The sockhash lock must be held as a writer here.
+ */
struct tcp_bind_bucket *tcp_bucket_create(unsigned short snum)
{
struct tcp_bind_bucket *tb;
struct tcp_bind_bucket **head =
&tcp_bhash[tcp_bhashfn(snum)];
tb->port = snum;
- tb->flags = TCPB_FLAG_LOCKED;
+ tb->fastreuse = 0;
tb->owners = NULL;
if((tb->next = *head) != NULL)
tb->next->pprev = &tb->next;
tb = tcp_bhash[tcp_bhashfn(snum)];
for( ; (tb && (tb->port != snum)); tb = tb->next)
;
- if(tb == NULL && tcp_bucket_create(snum) == NULL)
- ret = 1;
+ ret = 0
+ if (tb == NULL) {
+ if ((tb = tcp_bucket_create(snum)) == NULL)
+ ret = 1;
+ }
SOCKHASH_UNLOCK_WRITE();
return ret;
}
#endif
-static int tcp_v4_verify_bind(struct sock *sk, unsigned short snum)
+static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child)
+{
+ struct tcp_bind_bucket *tb = (struct tcp_bind_bucket *)sk->prev;
+
+ if ((child->bind_next = tb->owners) != NULL)
+ tb->owners->bind_pprev = &child->bind_next;
+ tb->owners = child;
+ child->bind_pprev = &tb->owners;
+ child->prev = (struct sock *) tb;
+}
+
+__inline__ void tcp_inherit_port(struct sock *sk, struct sock *child)
+{
+ SOCKHASH_LOCK_WRITE();
+ __tcp_inherit_port(sk, child);
+ SOCKHASH_UNLOCK_WRITE();
+}
+
+/* Obtain a reference to a local port for the given sock,
+ * if snum is zero it means select any available local port.
+ */
+static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
{
struct tcp_bind_bucket *tb;
- int result = 0;
SOCKHASH_LOCK_WRITE();
- for(tb = tcp_bhash[tcp_bhashfn(snum)];
- (tb && (tb->port != snum));
- tb = tb->next)
- ;
- if(tb && tb->owners) {
- /* Fast path for reuse ports, see include/net/tcp.h for a very
- * detailed description of why this works, and why it is worth
- * the effort at all. -DaveM
- */
- if((tb->flags & TCPB_FLAG_FASTREUSE) &&
- (sk->reuse != 0)) {
- goto go_like_smoke;
+ if (snum == 0) {
+ int rover = tcp_port_rover;
+ int low = sysctl_local_port_range[0];
+ int high = sysctl_local_port_range[1];
+ int remaining = (high - low) + 1;
+
+ do { rover++;
+ if ((rover < low) || (rover > high))
+ rover = low;
+ tb = tcp_bhash[tcp_bhashfn(rover)];
+ for ( ; tb; tb = tb->next)
+ if (tb->port == rover)
+ goto next;
+ break;
+ next:
+ } while (--remaining > 0);
+ tcp_port_rover = rover;
+
+ /* Exhausted local port range during search? */
+ if (remaining <= 0)
+ goto fail;
+
+ /* OK, here is the one we will use. */
+ snum = rover;
+ tb = NULL;
+ } else {
+ for (tb = tcp_bhash[tcp_bhashfn(snum)];
+ tb != NULL;
+ tb = tb->next)
+ if (tb->port == snum)
+ break;
+ }
+ if (tb != NULL && tb->owners != NULL) {
+ if (tb->fastreuse != 0 && sk->reuse != 0) {
+ goto success;
} else {
- struct sock *sk2;
+ struct sock *sk2 = tb->owners;
int sk_reuse = sk->reuse;
- /* We must walk the whole port owner list in this case. -DaveM */
- for(sk2 = tb->owners; sk2; sk2 = sk2->bind_next) {
+ for( ; sk2 != NULL; sk2 = sk2->bind_next) {
if (sk->bound_dev_if == sk2->bound_dev_if) {
- if(!sk_reuse || !sk2->reuse || sk2->state == TCP_LISTEN) {
- if(!sk2->rcv_saddr ||
- !sk->rcv_saddr ||
- (sk2->rcv_saddr == sk->rcv_saddr))
+ if (!sk_reuse ||
+ !sk2->reuse ||
+ sk2->state == TCP_LISTEN) {
+ if (!sk2->rcv_saddr ||
+ !sk->rcv_saddr ||
+ (sk2->rcv_saddr == sk->rcv_saddr))
break;
}
}
}
- if(sk2 != NULL)
- result = 1;
+ /* If we found a conflict, fail. */
+ if (sk2 != NULL)
+ goto fail;
}
}
- if(result == 0) {
- if(tb == NULL) {
- if((tb = tcp_bucket_create(snum)) == NULL)
- result = 1;
- else if (sk->reuse && sk->state != TCP_LISTEN)
- tb->flags |= TCPB_FLAG_FASTREUSE;
- } else {
- /* It could be pending garbage collection, this
- * kills the race and prevents it from disappearing
- * out from under us by the time we use it. -DaveM
- */
- if(tb->owners == NULL) {
- if (!(tb->flags & TCPB_FLAG_LOCKED)) {
- tb->flags = (TCPB_FLAG_LOCKED |
- ((sk->reuse &&
- sk->state != TCP_LISTEN) ?
- TCPB_FLAG_FASTREUSE : 0));
- tcp_dec_slow_timer(TCP_SLT_BUCKETGC);
- } else if (!(tb->flags & TCPB_FLAG_GOODSOCKNUM)) {
- /* Someone is in between the bind
- * and the actual connect or listen.
- * See if it was a legitimate reuse
- * and we are as well, else punt.
- */
- if (sk->reuse == 0 ||
- !(tb->flags & TCPB_FLAG_FASTREUSE))
- result = 1;
- } else
- tb->flags &= ~TCPB_FLAG_GOODSOCKNUM;
- }
- }
- }
-go_like_smoke:
+ if (tb == NULL &&
+ (tb = tcp_bucket_create(snum)) == NULL)
+ goto fail;
+ if (tb->owners == NULL) {
+ if (sk->reuse && sk->state != TCP_LISTEN)
+ tb->fastreuse = 1;
+ else
+ tb->fastreuse = 0;
+ } else if (tb->fastreuse &&
+ ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))
+ tb->fastreuse = 0;
+success:
+ sk->num = snum;
+ if ((sk->bind_next = tb->owners) != NULL)
+ tb->owners->bind_pprev = &sk->bind_next;
+ tb->owners = sk;
+ sk->bind_pprev = &tb->owners;
+ sk->prev = (struct sock *) tb;
+
SOCKHASH_UNLOCK_WRITE();
- return result;
+ return 0;
+
+fail:
+ SOCKHASH_UNLOCK_WRITE();
+ return 1;
}
-unsigned short tcp_good_socknum(void)
+/* Get rid of any references to a local port held by the
+ * given sock.
+ */
+__inline__ void __tcp_put_port(struct sock *sk)
{
struct tcp_bind_bucket *tb;
- int low = sysctl_local_port_range[0];
- int high = sysctl_local_port_range[1];
- int remaining = (high - low) + 1;
- int rover;
+ tb = (struct tcp_bind_bucket *) sk->prev;
+ if (sk->bind_next)
+ sk->bind_next->bind_pprev = sk->bind_pprev;
+ *(sk->bind_pprev) = sk->bind_next;
+ sk->prev = NULL;
+ if (tb->owners == NULL) {
+ if (tb->next)
+ tb->next->pprev = tb->pprev;
+ *(tb->pprev) = tb->next;
+ kmem_cache_free(tcp_bucket_cachep, tb);
+ }
+}
+
+void tcp_put_port(struct sock *sk)
+{
SOCKHASH_LOCK_WRITE();
- rover = tcp_port_rover;
- do {
- rover += 1;
- if((rover < low) || (rover > high))
- rover = low;
- tb = tcp_bhash[tcp_bhashfn(rover)];
- for( ; tb; tb = tb->next) {
- if(tb->port == rover)
- goto next;
- }
- break;
- next:
- } while(--remaining > 0);
- tcp_port_rover = rover;
- tb = NULL;
- if((remaining <= 0) || ((tb = tcp_bucket_create(rover)) == NULL))
- rover = 0;
- if (tb != NULL)
- tb->flags |= TCPB_FLAG_GOODSOCKNUM;
+ __tcp_put_port(sk);
SOCKHASH_UNLOCK_WRITE();
+}
+
+static __inline__ void __tcp_v4_hash(struct sock *sk)
+{
+ struct sock **skp;
- return rover;
+ if(sk->state == TCP_LISTEN)
+ skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
+ else
+ skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))];
+
+ if((sk->next = *skp) != NULL)
+ (*skp)->pprev = &sk->next;
+ *skp = sk;
+ sk->pprev = skp;
+ sk->prot->inuse++;
+ if(sk->prot->highestinuse < sk->prot->inuse)
+ sk->prot->highestinuse = sk->prot->inuse;
}
static void tcp_v4_hash(struct sock *sk)
{
if (sk->state != TCP_CLOSE) {
- struct sock **skp;
-
SOCKHASH_LOCK_WRITE();
- skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))];
- if((sk->next = *skp) != NULL)
- (*skp)->pprev = &sk->next;
- *skp = sk;
- sk->pprev = skp;
- tcp_sk_bindify(sk);
+ __tcp_v4_hash(sk);
SOCKHASH_UNLOCK_WRITE();
}
}
sk->next->pprev = sk->pprev;
*sk->pprev = sk->next;
sk->pprev = NULL;
+ sk->prot->inuse--;
tcp_reg_zap(sk);
- tcp_sk_unbindify(sk);
- }
- SOCKHASH_UNLOCK_WRITE();
-}
-
-static void tcp_v4_rehash(struct sock *sk)
-{
- unsigned char state;
-
- SOCKHASH_LOCK_WRITE();
- state = sk->state;
- if(sk->pprev != NULL) {
- if(sk->next)
- sk->next->pprev = sk->pprev;
- *sk->pprev = sk->next;
- sk->pprev = NULL;
- tcp_reg_zap(sk);
- }
- if(state != TCP_CLOSE) {
- struct sock **skp;
-
- if(state == TCP_LISTEN)
- skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
- else
- skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))];
-
- if((sk->next = *skp) != NULL)
- (*skp)->pprev = &sk->next;
- *skp = sk;
- sk->pprev = skp;
- if(state == TCP_LISTEN)
- tcp_sk_bindify(sk);
+ __tcp_put_port(sk);
}
SOCKHASH_UNLOCK_WRITE();
}
#endif
memcpy(newsk, sk, sizeof(*newsk));
- newsk->sklist_next = NULL;
newsk->state = TCP_SYN_RECV;
/* Clone the TCP header template */
if (newsk->sndbuf < (3 * newtp->pmtu_cookie))
newsk->sndbuf = min ((3 * newtp->pmtu_cookie), sysctl_wmem_max);
- tcp_v4_hash(newsk);
- add_to_prot_sklist(newsk);
+ SOCKHASH_LOCK_WRITE();
+ __tcp_v4_hash(newsk);
+ __tcp_inherit_port(sk, newsk);
+ SOCKHASH_UNLOCK_WRITE();
+
sk->data_ready(sk, 0); /* Deliver SIGIO */
return newsk;
goto discard_it;
}
+static void __tcp_v4_rehash(struct sock *sk)
+{
+ struct sock **skp = &tcp_ehash[(sk->hashent = tcp_sk_hashfn(sk))];
+
+ SOCKHASH_LOCK_WRITE();
+ if(sk->pprev) {
+ if(sk->next)
+ sk->next->pprev = sk->pprev;
+ *sk->pprev = sk->next;
+ sk->pprev = NULL;
+ tcp_reg_zap(sk);
+ }
+ if((sk->next = *skp) != NULL)
+ (*skp)->pprev = &sk->next;
+ *skp = sk;
+ sk->pprev = skp;
+ SOCKHASH_UNLOCK_WRITE();
+}
+
int tcp_v4_rebuild_header(struct sock *sk)
{
struct rtable *rt = (struct rtable *)sk->dst_cache;
sk->saddr = new_saddr;
sk->rcv_saddr = new_saddr;
- tcp_v4_rehash(sk);
+
+ /* XXX The only one ugly spot where we need to
+ * XXX really change the sockets identity after
+ * XXX it has entered the hashes. -DaveM
+ */
+ __tcp_v4_rehash(sk);
}
return 0;
while((skb = __skb_dequeue(&tp->out_of_order_queue)) != NULL)
kfree_skb(skb);
- /* Clean up a locked TCP bind bucket, this only happens if a
+ /* Clean up a referenced TCP bind bucket, this only happens if a
* port is allocated for a socket, but it never fully connects.
- * In which case we will find num to be non-zero and daddr to
- * be zero.
*/
- if(sk->daddr == 0 && sk->num != 0)
- tcp_bucket_unlock(sk);
+ if(sk->prev != NULL)
+ tcp_put_port(sk);
return 0;
}
+/* Proc filesystem TCP sock list dumping. */
+static void get_openreq(struct sock *sk, struct open_request *req, char *tmpbuf, int i)
+{
+ sprintf(tmpbuf, "%4d: %08lX:%04X %08lX:%04X"
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u",
+ i,
+ (long unsigned int)req->af.v4_req.loc_addr,
+ ntohs(sk->sport),
+ (long unsigned int)req->af.v4_req.rmt_addr,
+ ntohs(req->rmt_port),
+ TCP_SYN_RECV,
+ 0,0, /* could print option size, but that is af dependent. */
+ 1, /* timers active (only the expire timer) */
+ (unsigned long)(req->expires - jiffies),
+ req->retrans,
+ sk->socket ? sk->socket->inode->i_uid : 0,
+ 0, /* non standard timer */
+ 0 /* open_requests have no inode */
+ );
+}
+
+static void get_tcp_sock(struct sock *sp, char *tmpbuf, int i)
+{
+ unsigned int dest, src;
+ __u16 destp, srcp;
+ int timer_active, timer_active1, timer_active2;
+ unsigned long timer_expires;
+ struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
+
+ dest = sp->daddr;
+ src = sp->rcv_saddr;
+ destp = ntohs(sp->dport);
+ srcp = ntohs(sp->sport);
+ timer_active1 = tp->retransmit_timer.prev != NULL;
+ timer_active2 = sp->timer.prev != NULL;
+ timer_active = 0;
+ timer_expires = (unsigned) -1;
+ if (timer_active1 && tp->retransmit_timer.expires < timer_expires) {
+ timer_active = 1;
+ timer_expires = tp->retransmit_timer.expires;
+ }
+ if (timer_active2 && sp->timer.expires < timer_expires) {
+ timer_active = 2;
+ timer_expires = sp->timer.expires;
+ }
+ if(timer_active == 0)
+ timer_expires = jiffies;
+
+ sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+ i, src, srcp, dest, destp, sp->state,
+ tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
+ timer_active, timer_expires-jiffies,
+ tp->retransmits,
+ sp->socket ? sp->socket->inode->i_uid : 0,
+ timer_active ? sp->timeout : 0,
+ sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+static void get_timewait_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
+{
+ extern int tcp_tw_death_row_slot;
+ unsigned int dest, src;
+ __u16 destp, srcp;
+ int slot_dist;
+
+ dest = tw->daddr;
+ src = tw->rcv_saddr;
+ destp = ntohs(tw->dport);
+ srcp = ntohs(tw->sport);
+
+ slot_dist = tw->death_slot;
+ if(slot_dist > tcp_tw_death_row_slot)
+ slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot;
+ else
+ slot_dist = tcp_tw_death_row_slot - slot_dist;
+
+ sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+ " %02X %08X:%08X %02X:%08X %08X %5d %8d %d",
+ i, src, srcp, dest, destp, TCP_TIME_WAIT, 0, 0,
+ 3, slot_dist * TCP_TWKILL_PERIOD, 0, 0, 0, 0);
+}
+
+int tcp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+ int len = 0, num = 0, i;
+ off_t begin, pos = 0;
+ char tmpbuf[129];
+
+ if (offset < 128)
+ len += sprintf(buffer, "%-127s\n",
+ " sl local_address rem_address st tx_queue "
+ "rx_queue tr tm->when retrnsmt uid timeout inode");
+
+ pos = 128;
+ SOCKHASH_LOCK_READ();
+
+ /* First, walk listening socket table. */
+ for(i = 0; i < TCP_LHTABLE_SIZE; i++) {
+ struct sock *sk = tcp_listening_hash[i];
+
+ for (sk = tcp_listening_hash[i]; sk; sk = sk->next, num++) {
+ struct open_request *req;
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+ if (sk->family != PF_INET)
+ continue;
+ pos += 128;
+ if (pos >= offset) {
+ get_tcp_sock(sk, tmpbuf, num);
+ len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+ if (len >= length)
+ goto out;
+ }
+ for (req = tp->syn_wait_queue; req; req = req->dl_next, num++) {
+ if (req->sk)
+ continue;
+ pos += 128;
+ if (pos < offset)
+ continue;
+ get_openreq(sk, req, tmpbuf, num);
+ len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+ if(len >= length)
+ goto out;
+ }
+ }
+ }
+
+ /* Next, walk established hash chain. */
+ for (i = 0; i < (tcp_ehash_size >> 1); i++) {
+ struct sock *sk;
+
+ for(sk = tcp_ehash[i]; sk; sk = sk->next, num++) {
+ if (sk->family != PF_INET)
+ continue;
+ pos += 128;
+ if (pos < offset)
+ continue;
+ get_tcp_sock(sk, tmpbuf, num);
+ len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+ if(len >= length)
+ goto out;
+ }
+ }
+
+ /* Finally, walk time wait buckets. */
+ for (i = (tcp_ehash_size>>1); i < tcp_ehash_size; i++) {
+ struct tcp_tw_bucket *tw;
+ for (tw = (struct tcp_tw_bucket *)tcp_ehash[i];
+ tw != NULL;
+ tw = (struct tcp_tw_bucket *)tw->next, num++) {
+ if (tw->family != PF_INET)
+ continue;
+ pos += 128;
+ if (pos < offset)
+ continue;
+ get_timewait_sock(tw, tmpbuf, num);
+ len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+ if(len >= length)
+ goto out;
+ }
+ }
+
+out:
+ SOCKHASH_UNLOCK_READ();
+
+ begin = len - (pos - offset);
+ *start = buffer + begin;
+ len -= begin;
+ if(len > length)
+ len = length;
+ if (len < 0)
+ len = 0;
+ return len;
+}
+
struct proto tcp_prot = {
- (struct sock *)&tcp_prot, /* sklist_next */
- (struct sock *)&tcp_prot, /* sklist_prev */
tcp_close, /* close */
tcp_v4_connect, /* connect */
tcp_accept, /* accept */
tcp_v4_do_rcv, /* backlog_rcv */
tcp_v4_hash, /* hash */
tcp_v4_unhash, /* unhash */
- tcp_v4_rehash, /* rehash */
- tcp_good_socknum, /* good_socknum */
- tcp_v4_verify_bind, /* verify_bind */
+ tcp_v4_get_port, /* get_port */
128, /* max_header */
0, /* retransmits */
"TCP", /* name */
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_timer.c,v 1.64 1999/05/27 00:37:31 davem Exp $
+ * Version: $Id: tcp_timer.c,v 1.65 1999/07/02 11:26:35 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
static void tcp_sltimer_handler(unsigned long);
static void tcp_syn_recv_timer(unsigned long);
static void tcp_keepalive(unsigned long data);
-static void tcp_bucketgc(unsigned long);
static void tcp_twkill(unsigned long);
struct timer_list tcp_slow_timer = {
struct tcp_sl_timer tcp_slt_array[TCP_SLT_MAX] = {
{ATOMIC_INIT(0), TCP_SYNACK_PERIOD, 0, tcp_syn_recv_timer},/* SYNACK */
{ATOMIC_INIT(0), TCP_KEEPALIVE_PERIOD, 0, tcp_keepalive}, /* KEEPALIVE */
- {ATOMIC_INIT(0), TCP_TWKILL_PERIOD, 0, tcp_twkill}, /* TWKILL */
- {ATOMIC_INIT(0), TCP_BUCKETGC_PERIOD, 0, tcp_bucketgc} /* BUCKETGC */
+ {ATOMIC_INIT(0), TCP_TWKILL_PERIOD, 0, tcp_twkill} /* TWKILL */
};
const char timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n";
return res;
}
-/* Garbage collect TCP bind buckets. */
-static void tcp_bucketgc(unsigned long data)
-{
- int i, reaped = 0;;
-
- SOCKHASH_LOCK_WRITE_BH();
- for(i = 0; i < tcp_bhash_size; i++) {
- struct tcp_bind_bucket *tb = tcp_bhash[i];
-
- while(tb) {
- struct tcp_bind_bucket *next = tb->next;
-
- if((tb->owners == NULL) &&
- !(tb->flags & TCPB_FLAG_LOCKED)) {
- reaped++;
-
- /* Unlink bucket. */
- if(tb->next)
- tb->next->pprev = tb->pprev;
- *tb->pprev = tb->next;
-
- /* Finally, free it up. */
- kmem_cache_free(tcp_bucket_cachep, tb);
- }
- tb = next;
- }
- }
- SOCKHASH_UNLOCK_WRITE_BH();
-
- if(reaped != 0) {
- struct tcp_sl_timer *slt = (struct tcp_sl_timer *)data;
-
- /* Eat timer references. */
- atomic_sub(reaped, &slt->count);
- }
-}
-
/* Kill off TIME_WAIT sockets once their lifetime has expired. */
int tcp_tw_death_row_slot = 0;
static struct tcp_tw_bucket *tcp_tw_death_row[TCP_TWKILL_SLOTS] =
*
* The User Datagram Protocol (UDP).
*
- * Version: $Id: udp.c,v 1.70 1999/06/13 05:55:16 davem Exp $
+ * Version: $Id: udp.c,v 1.71 1999/07/02 11:26:33 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
struct sock *udp_hash[UDP_HTABLE_SIZE];
-static int udp_v4_verify_bind(struct sock *sk, unsigned short snum)
-{
- struct sock *sk2;
- int retval = 0, sk_reuse = sk->reuse;
-
- SOCKHASH_LOCK_READ();
- for(sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) {
- if((sk2->num == snum) && (sk2 != sk)) {
- unsigned char state = sk2->state;
- int sk2_reuse = sk2->reuse;
-
- /* Two sockets can be bound to the same port if they're
- * bound to different interfaces.
- */
-
- if(sk2->bound_dev_if != sk->bound_dev_if)
- continue;
+/* Shared by v4/v6 udp. */
+int udp_port_rover = 0;
- if(!sk2->rcv_saddr || !sk->rcv_saddr) {
- if((!sk2_reuse) ||
- (!sk_reuse) ||
- (state == TCP_LISTEN)) {
- retval = 1;
- break;
- }
- } else if(sk2->rcv_saddr == sk->rcv_saddr) {
- if((!sk_reuse) ||
- (!sk2_reuse) ||
- (state == TCP_LISTEN)) {
- retval = 1;
- break;
- }
+static int udp_v4_get_port(struct sock *sk, unsigned short snum)
+{
+ SOCKHASH_LOCK_WRITE();
+ if (snum == 0) {
+ int best_size_so_far, best, result, i;
+
+ if (udp_port_rover > sysctl_local_port_range[1] ||
+ udp_port_rover < sysctl_local_port_range[0])
+ udp_port_rover = sysctl_local_port_range[0];
+ best_size_so_far = 32767;
+ best = result = udp_port_rover;
+ for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
+ struct sock *sk;
+ int size;
+
+ sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+ if (!sk) {
+ if (result > sysctl_local_port_range[1])
+ result = sysctl_local_port_range[0] +
+ ((result - sysctl_local_port_range[0]) &
+ (UDP_HTABLE_SIZE - 1));
+ goto gotit;
}
+ size = 0;
+ do {
+ if (++size >= best_size_so_far)
+ goto next;
+ } while ((sk = sk->next) != NULL);
+ best_size_so_far = size;
+ best = result;
+ next:
+ }
+ result = best;
+ for(;; result += UDP_HTABLE_SIZE) {
+ if (result > sysctl_local_port_range[1])
+ result = sysctl_local_port_range[0]
+ + ((result - sysctl_local_port_range[0]) &
+ (UDP_HTABLE_SIZE - 1));
+ if (!udp_lport_inuse(result))
+ break;
+ }
+gotit:
+ udp_port_rover = snum = result;
+ } else {
+ struct sock *sk2;
+
+ for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+ sk2 != NULL;
+ sk2 = sk2->next) {
+ if (sk2->num == snum &&
+ sk2 != sk &&
+ sk2->bound_dev_if == sk->bound_dev_if &&
+ (!sk2->rcv_saddr ||
+ !sk->rcv_saddr ||
+ sk2->rcv_saddr == sk->rcv_saddr) &&
+ (!sk2->reuse || !sk->reuse))
+ goto fail;
}
}
- SOCKHASH_UNLOCK_READ();
- return retval;
-}
-
-static inline int udp_lport_inuse(u16 num)
-{
- struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)];
-
- for(; sk != NULL; sk = sk->next) {
- if(sk->num == num)
- return 1;
- }
+ sk->num = snum;
+ SOCKHASH_UNLOCK_WRITE();
return 0;
-}
-
-/* Shared by v4/v6 udp. */
-unsigned short udp_good_socknum(void)
-{
- int result;
- static int start = 0;
- int i, best, best_size_so_far;
-
- SOCKHASH_LOCK_READ();
- if (start > sysctl_local_port_range[1] || start < sysctl_local_port_range[0])
- start = sysctl_local_port_range[0];
-
- best_size_so_far = 32767; /* "big" num */
- best = result = start;
-
- for(i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
- struct sock *sk;
- int size;
-
- sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
-
- if(!sk) {
- if (result > sysctl_local_port_range[1])
- result = sysctl_local_port_range[0]
- + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1));
- goto out;
- }
-
- /* Is this one better than our best so far? */
- size = 0;
- do {
- if(++size >= best_size_so_far)
- goto next;
- } while((sk = sk->next) != NULL);
- best_size_so_far = size;
- best = result;
- next:
- }
- result = best;
-
- for(;; result += UDP_HTABLE_SIZE) {
- /* Get into range (but preserve hash bin)... */
- if (result > sysctl_local_port_range[1])
- result = sysctl_local_port_range[0]
- + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1));
- if (!udp_lport_inuse(result))
- break;
- }
-out:
- start = result;
- SOCKHASH_UNLOCK_READ();
- return result;
+fail:
+ SOCKHASH_UNLOCK_WRITE();
+ return 1;
}
static void udp_v4_hash(struct sock *sk)
{
- struct sock **skp;
- int num = sk->num;
-
- num &= (UDP_HTABLE_SIZE - 1);
- skp = &udp_hash[num];
+ struct sock **skp = &udp_hash[sk->num & (UDP_HTABLE_SIZE - 1)];
SOCKHASH_LOCK_WRITE();
- sk->next = *skp;
+ if ((sk->next = *skp) != NULL)
+ (*skp)->pprev = &sk->next;
*skp = sk;
- sk->hashent = num;
+ sk->pprev = skp;
+ sk->prot->inuse++;
+ if(sk->prot->highestinuse < sk->prot->inuse)
+ sk->prot->highestinuse = sk->prot->inuse;
SOCKHASH_UNLOCK_WRITE();
}
static void udp_v4_unhash(struct sock *sk)
{
- struct sock **skp;
- int num = sk->num;
-
- num &= (UDP_HTABLE_SIZE - 1);
- skp = &udp_hash[num];
-
- SOCKHASH_LOCK_WRITE();
- while(*skp != NULL) {
- if(*skp == sk) {
- *skp = sk->next;
- break;
- }
- skp = &((*skp)->next);
- }
- SOCKHASH_UNLOCK_WRITE();
-}
-
-static void udp_v4_rehash(struct sock *sk)
-{
- struct sock **skp;
- int num = sk->num;
- int oldnum = sk->hashent;
-
- num &= (UDP_HTABLE_SIZE - 1);
- skp = &udp_hash[oldnum];
-
SOCKHASH_LOCK_WRITE();
- while(*skp != NULL) {
- if(*skp == sk) {
- *skp = sk->next;
- break;
- }
- skp = &((*skp)->next);
+ if (sk->pprev) {
+ if (sk->next)
+ sk->next->pprev = sk->pprev;
+ *sk->pprev = sk->next;
+ sk->pprev = NULL;
+ sk->prot->inuse--;
}
- sk->next = udp_hash[num];
- udp_hash[num] = sk;
- sk->hashent = num;
SOCKHASH_UNLOCK_WRITE();
}
if (msg->msg_name) {
struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
if (msg->msg_namelen < sizeof(*usin))
- return(-EINVAL);
+ return -EINVAL;
if (usin->sin_family != AF_INET)
return -EINVAL;
{
unsigned long amount;
- if (sk->state == TCP_LISTEN) return(-EINVAL);
amount = sock_wspace(sk);
return put_user(amount, (int *)arg);
}
struct sk_buff *skb;
unsigned long amount;
- if (sk->state == TCP_LISTEN)
- return(-EINVAL);
amount = 0;
/* N.B. Is this interrupt safe??
-> Yes. Interrupts do not remove skbs. --ANK (980725)
}
default:
- return(-ENOIOCTLCMD);
+ return -ENOIOCTLCMD;
}
return(0);
}
if (addr_len < sizeof(*usin))
- return(-EINVAL);
+ return -EINVAL;
/*
* 1003.1g - break association.
}
if (usin->sin_family && usin->sin_family != AF_INET)
- return(-EAFNOSUPPORT);
+ return -EAFNOSUPPORT;
dst_release(xchg(&sk->dst_cache, NULL));
return(0);
}
+static void get_udp_sock(struct sock *sp, char *tmpbuf, int i)
+{
+ unsigned int dest, src;
+ __u16 destp, srcp;
+ int timer_active;
+ unsigned long timer_expires;
+
+ dest = sp->daddr;
+ src = sp->rcv_saddr;
+ destp = ntohs(sp->dport);
+ srcp = ntohs(sp->sport);
+ timer_active = (sp->timer.prev != NULL) ? 2 : 0;
+ timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies);
+ sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
+ " %02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+ i, src, srcp, dest, destp, sp->state,
+ atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
+ timer_active, timer_expires-jiffies, 0,
+ sp->socket->inode->i_uid, timer_active ? sp->timeout : 0,
+ sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+int udp_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+ int len = 0, num = 0, i;
+ off_t pos = 0;
+ off_t begin;
+ char tmpbuf[129];
+
+ if (offset < 128)
+ len += sprintf(buffer, "%-127s\n",
+ " sl local_address rem_address st tx_queue "
+ "rx_queue tr tm->when retrnsmt uid timeout inode");
+ pos = 128;
+ SOCKHASH_LOCK_READ();
+ for (i = 0; i < UDP_HTABLE_SIZE; i++) {
+ struct sock *sk;
+
+ for (sk = udp_hash[i]; sk; sk = sk->next, num++) {
+ if (sk->family != PF_INET)
+ continue;
+ pos += 128;
+ if (pos < offset)
+ continue;
+ get_udp_sock(sk, tmpbuf, i);
+ len += sprintf(buffer+len, "%-127s\n", tmpbuf);
+ if(len >= length)
+ goto out;
+ }
+ }
+out:
+ SOCKHASH_UNLOCK_READ();
+ begin = len - (pos - offset);
+ *start = buffer + begin;
+ len -= begin;
+ if(len > length)
+ len = length;
+ if (len < 0)
+ len = 0;
+ return len;
+}
+
struct proto udp_prot = {
- (struct sock *)&udp_prot, /* sklist_next */
- (struct sock *)&udp_prot, /* sklist_prev */
udp_close, /* close */
udp_connect, /* connect */
NULL, /* accept */
udp_queue_rcv_skb, /* backlog_rcv */
udp_v4_hash, /* hash */
udp_v4_unhash, /* unhash */
- udp_v4_rehash, /* rehash */
- udp_good_socknum, /* good_socknum */
- udp_v4_verify_bind, /* verify_bind */
+ udp_v4_get_port, /* good_socknum */
128, /* max_header */
0, /* retransmits */
"UDP", /* name */
*
* Adapted from linux/net/ipv4/af_inet.c
*
- * $Id: af_inet6.c,v 1.44 1999/06/09 08:29:29 davem Exp $
+ * $Id: af_inet6.c,v 1.45 1999/07/02 11:26:38 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
*/
sk->sport = ntohs(sk->num);
sk->prot->hash(sk);
- add_to_prot_sklist(sk);
}
if (sk->prot->init) {
addr_type = ipv6_addr_type(&addr->sin6_addr);
if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
- return(-EINVAL);
+ return -EINVAL;
/* Check if the address belongs to the host. */
if (addr_type == IPV6_ADDR_MAPPED) {
v4addr = addr->sin6_addr.s6_addr32[3];
if (inet_addr_type(v4addr) != RTN_LOCAL)
- return(-EADDRNOTAVAIL);
+ return -EADDRNOTAVAIL;
} else {
if (addr_type != IPV6_ADDR_ANY) {
/* ipv4 addr of the socket is invalid. Only the
v4addr = LOOPBACK4_IPV6;
if (!(addr_type & IPV6_ADDR_MULTICAST)) {
if (ipv6_chk_addr(&addr->sin6_addr, NULL, 0) == NULL)
- return(-EADDRNOTAVAIL);
+ return -EADDRNOTAVAIL;
}
}
}
sizeof(struct in6_addr));
snum = ntohs(addr->sin6_port);
- if (snum == 0)
- snum = sk->prot->good_socknum();
- if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
- return(-EACCES);
+ if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+ return -EACCES;
/* Make sure we are allowed to bind here. */
- if(sk->prot->verify_bind(sk, snum))
+ if(sk->prot->get_port(sk, snum) != 0)
return -EADDRINUSE;
- sk->num = snum;
sk->sport = ntohs(sk->num);
sk->dport = 0;
sk->daddr = 0;
- sk->prot->rehash(sk);
- add_to_prot_sklist(sk);
+ sk->prot->hash(sk);
return(0);
}
sk = sock->sk;
if (peer) {
if (!tcp_connected(sk->state))
- return(-ENOTCONN);
+ return -ENOTCONN;
sin->sin6_port = sk->dport;
memcpy(&sin->sin6_addr, &sk->net_pinfo.af_inet6.daddr,
sizeof(struct in6_addr));
* PROC file system. This is very similar to the IPv4 version,
* except it reports the sockets in the INET6 address family.
*
- * Version: $Id: proc.c,v 1.10 1999/05/27 00:38:14 davem Exp $
+ * Version: $Id: proc.c,v 1.11 1999/07/02 11:26:45 davem Exp $
*
* Authors: David S. Miller (davem@caip.rutgers.edu)
*
#include <net/transp_v6.h>
#include <net/ipv6.h>
-/* This is the main implementation workhorse of all these routines. */
-static int get__netinfo6(struct proto *pro, char *buffer, int format, char **start,
- off_t offset, int length)
-{
- struct sock *sp;
- struct tcp_opt *tp;
- int timer_active, timer_active1, timer_active2;
- unsigned long timer_expires;
- struct in6_addr *dest, *src;
- unsigned short destp, srcp;
- int len = 0, i = 0;
- off_t pos = 0;
- off_t begin;
- char tmpbuf[150];
-
- if(offset < 149)
- len += sprintf(buffer, "%-148s\n",
- " sl " /* 6 */
- "local_address " /* 38 */
- "remote_address " /* 38 */
- "st tx_queue rx_queue tr tm->when retrnsmt" /* 41 */
- " uid timeout inode"); /* 21 */
- /*----*/
- /*144 */
-
- pos = 149;
- SOCKHASH_LOCK_READ();
- sp = pro->sklist_next;
- while(sp != (struct sock *)pro) {
- struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sp;
- int tw_bucket = 0;
-
- pos += 149;
- if(pos < offset)
- goto next;
- tp = &(sp->tp_pinfo.af_tcp);
- if((format == 0) && (sp->state == TCP_TIME_WAIT)) {
- tw_bucket = 1;
- dest = &tw->v6_daddr;
- src = &tw->v6_rcv_saddr;
- } else {
- dest = &sp->net_pinfo.af_inet6.daddr;
- src = &sp->net_pinfo.af_inet6.rcv_saddr;
- }
- destp = ntohs(sp->dport);
- srcp = ntohs(sp->sport);
-
- if((format == 0) && (sp->state == TCP_TIME_WAIT)) {
- extern int tcp_tw_death_row_slot;
- int slot_dist;
-
- timer_active1 = timer_active2 = 0;
- timer_active = 3;
- slot_dist = tw->death_slot;
- if(slot_dist > tcp_tw_death_row_slot)
- slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot;
- else
- slot_dist = tcp_tw_death_row_slot - slot_dist;
- timer_expires = jiffies + (slot_dist * TCP_TWKILL_PERIOD);
- } else {
- timer_active1 = tp->retransmit_timer.prev != NULL;
- timer_active2 = sp->timer.prev != NULL;
- timer_active = 0;
- timer_expires = (unsigned) -1;
- }
- if(timer_active1 && tp->retransmit_timer.expires < timer_expires) {
- timer_active = timer_active1;
- timer_expires = tp->retransmit_timer.expires;
- }
- if(timer_active2 && sp->timer.expires < timer_expires) {
- timer_active = timer_active2;
- timer_expires = sp->timer.expires;
- }
- if(timer_active == 0)
- timer_expires = jiffies;
- sprintf(tmpbuf, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
- "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
- i,
- src->s6_addr32[0], src->s6_addr32[1],
- src->s6_addr32[2], src->s6_addr32[3], srcp,
- dest->s6_addr32[0], dest->s6_addr32[1],
- dest->s6_addr32[2], dest->s6_addr32[3], destp,
- sp->state,
- (tw_bucket ?
- 0 :
- (format == 0) ?
- tp->write_seq-tp->snd_una :
- atomic_read(&sp->wmem_alloc)),
- (tw_bucket ?
- 0 :
- (format == 0) ?
- tp->rcv_nxt-tp->copied_seq :
- atomic_read(&sp->rmem_alloc)),
- timer_active, timer_expires-jiffies,
- (tw_bucket ? 0 : tp->retransmits),
- ((!tw_bucket && sp->socket) ?
- sp->socket->inode->i_uid : 0),
- (!tw_bucket && timer_active) ? sp->timeout : 0,
- ((!tw_bucket && sp->socket) ?
- sp->socket->inode->i_ino : 0));
-
- len += sprintf(buffer+len, "%-148s\n", tmpbuf);
- if(len >= length)
- break;
- next:
- sp = sp->sklist_next;
- i++;
- }
- SOCKHASH_UNLOCK_READ();
-
- begin = len - (pos - offset);
- *start = buffer + begin;
- len -= begin;
- if(len > length)
- len = length;
- return len;
-}
-
-/* These get exported and registered with procfs in af_inet6.c at init time. */
-int tcp6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
- return get__netinfo6(&tcpv6_prot, buffer, 0, start, offset, length);
-}
-
-int udp6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
- return get__netinfo6(&udpv6_prot, buffer, 1, start, offset, length);
-}
-
-int raw6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
-{
- return get__netinfo6(&rawv6_prot, buffer, 1, start, offset, length);
-}
-
int afinet6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
{
int len = 0;
*
* Adapted from linux/net/ipv4/raw.c
*
- * $Id: raw.c,v 1.26 1999/06/09 10:11:18 davem Exp $
+ * $Id: raw.c,v 1.27 1999/07/02 11:26:40 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
static void raw_v6_hash(struct sock *sk)
{
- struct sock **skp;
- int num = sk->num;
+ struct sock **skp = &raw_v6_htable[sk->num & (RAWV6_HTABLE_SIZE - 1)];
- num &= (RAWV6_HTABLE_SIZE - 1);
- skp = &raw_v6_htable[num];
SOCKHASH_LOCK_WRITE();
- sk->next = *skp;
+ if ((sk->next = *skp) != NULL)
+ (*skp)->pprev = &sk->next;
*skp = sk;
- sk->hashent = num;
+ sk->pprev = skp;
+ sk->prot->inuse++;
+ if(sk->prot->highestinuse < sk->prot->inuse)
+ sk->prot->highestinuse = sk->prot->inuse;
SOCKHASH_UNLOCK_WRITE();
}
static void raw_v6_unhash(struct sock *sk)
{
- struct sock **skp;
- int num = sk->num;
-
- num &= (RAWV6_HTABLE_SIZE - 1);
- skp = &raw_v6_htable[num];
-
- SOCKHASH_LOCK_WRITE();
- while(*skp != NULL) {
- if(*skp == sk) {
- *skp = sk->next;
- break;
- }
- skp = &((*skp)->next);
- }
- SOCKHASH_UNLOCK_WRITE();
-}
-
-static void raw_v6_rehash(struct sock *sk)
-{
- struct sock **skp;
- int num = sk->num;
- int oldnum = sk->hashent;
-
- num &= (RAWV6_HTABLE_SIZE - 1);
- skp = &raw_v6_htable[oldnum];
-
SOCKHASH_LOCK_WRITE();
- while(*skp != NULL) {
- if(*skp == sk) {
- *skp = sk->next;
- break;
- }
- skp = &((*skp)->next);
+ if (sk->pprev) {
+ if (sk->next)
+ sk->next->pprev = sk->pprev;
+ *sk->pprev = sk->next;
+ sk->pprev = NULL;
+ sk->prot->inuse--;
}
- sk->next = raw_v6_htable[num];
- raw_v6_htable[num] = sk;
- sk->hashent = num;
SOCKHASH_UNLOCK_WRITE();
}
return(0);
}
+static void get_raw6_sock(struct sock *sp, char *tmpbuf, int i)
+{
+ struct in6_addr *dest, *src;
+ __u16 destp, srcp;
+ int timer_active;
+ unsigned long timer_expires;
+
+ dest = &sp->net_pinfo.af_inet6.daddr;
+ src = &sp->net_pinfo.af_inet6.rcv_saddr;
+ destp = ntohs(sp->dport);
+ srcp = ntohs(sp->sport);
+ timer_active = (sp->timer.prev != NULL) ? 2 : 0;
+ timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies);
+ sprintf(tmpbuf,
+ "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+ "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+ i,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp,
+ sp->state,
+ atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
+ timer_active, timer_expires-jiffies, 0,
+ sp->socket->inode->i_uid, timer_active ? sp->timeout : 0,
+ sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+int raw6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+ int len = 0, num = 0, i;
+ off_t pos = 0;
+ off_t begin;
+ char tmpbuf[150];
+
+ if (offset < 149)
+ len += sprintf(buffer, "%-148s\n",
+ " sl " /* 6 */
+ "local_address " /* 38 */
+ "remote_address " /* 38 */
+ "st tx_queue rx_queue tr tm->when retrnsmt" /* 41 */
+ " uid timeout inode"); /* 21 */
+ /*----*/
+ /*144 */
+ pos = 149;
+ SOCKHASH_LOCK_READ();
+ for (i = 0; i < RAWV6_HTABLE_SIZE; i++) {
+ struct sock *sk;
+
+ for (sk = raw_v6_htable[i]; sk; sk = sk->next, num++) {
+ if (sk->family != PF_INET6)
+ continue;
+ pos += 149;
+ if (pos < offset)
+ continue;
+ get_raw6_sock(sk, tmpbuf, i);
+ len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+ if(len >= length)
+ goto out;
+ }
+ }
+out:
+ SOCKHASH_UNLOCK_READ();
+ begin = len - (pos - offset);
+ *start = buffer + begin;
+ len -= begin;
+ if(len > length)
+ len = length;
+ if (len < 0)
+ len = 0;
+ return len;
+}
+
struct proto rawv6_prot = {
- (struct sock *)&rawv6_prot, /* sklist_next */
- (struct sock *)&rawv6_prot, /* sklist_prev */
rawv6_close, /* close */
udpv6_connect, /* connect */
NULL, /* accept */
rawv6_rcv_skb, /* backlog_rcv */
raw_v6_hash, /* hash */
raw_v6_unhash, /* unhash */
- raw_v6_rehash, /* rehash */
- NULL, /* good_socknum */
- NULL, /* verify_bind */
+ NULL, /* get_port */
128, /* max_header */
0, /* retransmits */
"RAW", /* name */
* Authors:
* Pedro Roque <roque@di.fc.ul.pt>
*
- * $Id: tcp_ipv6.c,v 1.108 1999/06/09 08:29:43 davem Exp $
+ * $Id: tcp_ipv6.c,v 1.109 1999/07/02 11:26:41 davem Exp $
*
* Based on:
* linux/net/ipv4/tcp.c
* But it doesn't matter, the recalculation is in the rarest path
* this function ever takes.
*/
-static int tcp_v6_verify_bind(struct sock *sk, unsigned short snum)
+static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
{
struct tcp_bind_bucket *tb;
- int result = 0;
SOCKHASH_LOCK_WRITE();
- for(tb = tcp_bhash[tcp_bhashfn(snum)];
- (tb && (tb->port != snum));
- tb = tb->next)
- ;
- if(tb && tb->owners) {
- /* Fast path for reuse ports, see include/net/tcp.h for a very
- * detailed description of why this works, and why it is worth
- * the effort at all. -DaveM
- */
- if((tb->flags & TCPB_FLAG_FASTREUSE) &&
- (sk->reuse != 0)) {
- goto go_like_smoke;
+ if (snum == 0) {
+ int rover = tcp_port_rover;
+ int low = sysctl_local_port_range[0];
+ int high = sysctl_local_port_range[1];
+ int remaining = (high - low) + 1;
+
+ do { rover++;
+ if ((rover < low) || (rover > high))
+ rover = low;
+ tb = tcp_bhash[tcp_bhashfn(rover)];
+ for ( ; tb; tb = tb->next)
+ if (tb->port == rover)
+ goto next;
+ break;
+ next:
+ } while (--remaining > 0);
+ tcp_port_rover = rover;
+
+ /* Exhausted local port range during search? */
+ if (remaining <= 0)
+ goto fail;
+
+ /* OK, here is the one we will use. */
+ snum = rover;
+ tb = NULL;
+ } else {
+ for (tb = tcp_bhash[tcp_bhashfn(snum)];
+ tb != NULL;
+ tb = tb->next)
+ if (tb->port == snum)
+ break;
+ }
+ if (tb != NULL && tb->owners != NULL) {
+ if (tb->fastreuse != 0 && sk->reuse != 0) {
+ goto success;
} else {
- struct sock *sk2;
+ struct sock *sk2 = tb->owners;
int sk_reuse = sk->reuse;
int addr_type = ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr);
/* We must walk the whole port owner list in this case. -DaveM */
- for(sk2 = tb->owners; sk2; sk2 = sk2->bind_next) {
- if(sk->bound_dev_if == sk2->bound_dev_if) {
- if(!sk_reuse || !sk2->reuse || sk2->state == TCP_LISTEN) {
- if(addr_type == IPV6_ADDR_ANY ||
- !sk2->rcv_saddr ||
- !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
- &sk2->net_pinfo.af_inet6.rcv_saddr))
+ for( ; sk2 != NULL; sk2 = sk2->bind_next) {
+ if (sk->bound_dev_if == sk2->bound_dev_if) {
+ if (!sk_reuse ||
+ !sk2->reuse ||
+ sk2->state == TCP_LISTEN) {
+ if (!sk2->rcv_saddr ||
+ !addr_type == IPV6_ADDR_ANY ||
+ !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
+ &sk2->net_pinfo.af_inet6.rcv_saddr))
break;
}
}
}
- if(sk2 != NULL)
- result = 1;
+ /* If we found a conflict, fail. */
+ if (sk2 != NULL)
+ goto fail;
}
}
- if(result == 0) {
- if(tb == NULL) {
- if((tb = tcp_bucket_create(snum)) == NULL)
- result = 1;
- else if (sk->reuse && sk->state != TCP_LISTEN)
- tb->flags |= TCPB_FLAG_FASTREUSE;
- } else {
- /* It could be pending garbage collection, this
- * kills the race and prevents it from disappearing
- * out from under us by the time we use it. -DaveM
- */
- if(tb->owners == NULL) {
- if (!(tb->flags & TCPB_FLAG_LOCKED)) {
- tb->flags = (TCPB_FLAG_LOCKED |
- ((sk->reuse &&
- sk->state != TCP_LISTEN) ?
- TCPB_FLAG_FASTREUSE : 0));
- tcp_dec_slow_timer(TCP_SLT_BUCKETGC);
- } else if (!(tb->flags & TCPB_FLAG_GOODSOCKNUM)) {
- /* Someone is in between the bind
- * and the actual connect or listen.
- * See if it was a legitimate reuse
- * and we are as well, else punt.
- */
- if (sk->reuse == 0 ||
- !(tb->flags & TCPB_FLAG_FASTREUSE))
- result = 1;
- } else
- tb->flags &= ~TCPB_FLAG_GOODSOCKNUM;
- }
- }
- }
-go_like_smoke:
+ if (tb == NULL &&
+ (tb = tcp_bucket_create(snum)) == NULL)
+ goto fail;
+ if (tb->owners == NULL) {
+ if (sk->reuse && sk->state != TCP_LISTEN)
+ tb->fastreuse = 1;
+ else
+ tb->fastreuse = 0;
+ } else if (tb->fastreuse &&
+ ((sk->reuse == 0) || (sk->state == TCP_LISTEN)))
+ tb->fastreuse = 0;
+
+success:
+ sk->num = snum;
+ if ((sk->bind_next = tb->owners) != NULL)
+ tb->owners->bind_pprev = &sk->bind_next;
+ tb->owners = sk;
+ sk->bind_pprev = &tb->owners;
+ sk->prev = (struct sock *) tb;
+
SOCKHASH_UNLOCK_WRITE();
- return result;
+ return 0;
+
+fail:
+ SOCKHASH_UNLOCK_WRITE();
+ return 1;
}
static void tcp_v6_hash(struct sock *sk)
{
- /* Well, I know that it is ugly...
- All this ->prot, ->af_specific etc. need LARGE cleanup --ANK
- */
- if (sk->tp_pinfo.af_tcp.af_specific == &ipv6_mapped) {
- tcp_prot.hash(sk);
- return;
- }
if(sk->state != TCP_CLOSE) {
struct sock **skp;
+ /* Well, I know that it is ugly...
+ * All this ->prot, ->af_specific etc. need LARGE cleanup --ANK
+ */
+ if (sk->tp_pinfo.af_tcp.af_specific == &ipv6_mapped) {
+ tcp_prot.hash(sk);
+ return;
+ }
+
+ if(sk->state == TCP_LISTEN)
+ skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
+ else
+ skp = &tcp_ehash[(sk->hashent = tcp_v6_sk_hashfn(sk))];
+
SOCKHASH_LOCK_WRITE();
- skp = &tcp_ehash[(sk->hashent = tcp_v6_sk_hashfn(sk))];
if((sk->next = *skp) != NULL)
(*skp)->pprev = &sk->next;
*skp = sk;
sk->pprev = skp;
- tcp_sk_bindify(sk);
+ sk->prot->inuse++;
+ if(sk->prot->highestinuse < sk->prot->inuse)
+ sk->prot->highestinuse = sk->prot->inuse;
SOCKHASH_UNLOCK_WRITE();
}
}
sk->next->pprev = sk->pprev;
*sk->pprev = sk->next;
sk->pprev = NULL;
- tcp_sk_unbindify(sk);
- tcp_reg_zap(sk);
- }
- SOCKHASH_UNLOCK_WRITE();
-}
-
-static void tcp_v6_rehash(struct sock *sk)
-{
- unsigned char state;
-
- SOCKHASH_LOCK_WRITE();
- state = sk->state;
- if(sk->pprev != NULL) {
- if(sk->next)
- sk->next->pprev = sk->pprev;
- *sk->pprev = sk->next;
- sk->pprev = NULL;
+ sk->prot->inuse--;
tcp_reg_zap(sk);
- }
- if(state != TCP_CLOSE) {
- struct sock **skp;
-
- if(state == TCP_LISTEN)
- skp = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
- else
- skp = &tcp_ehash[(sk->hashent = tcp_v6_sk_hashfn(sk))];
-
- if((sk->next = *skp) != NULL)
- (*skp)->pprev = &sk->next;
- *skp = sk;
- sk->pprev = skp;
- if(state == TCP_LISTEN)
- tcp_sk_bindify(sk);
+ __tcp_put_port(sk);
}
SOCKHASH_UNLOCK_WRITE();
}
newsk->rcv_saddr= LOOPBACK4_IPV6;
newsk->prot->hash(newsk);
- add_to_prot_sklist(newsk);
-
+ tcp_inherit_port(sk, newsk);
sk->data_ready(sk, 0); /* Deliver SIGIO */
return newsk;
/* Clean up a locked TCP bind bucket, this only happens if a
* port is allocated for a socket, but it never fully connects.
- * In which case we will find num to be non-zero and daddr to
- * be zero.
*/
- if(ipv6_addr_any(&(sk->net_pinfo.af_inet6.daddr)) && sk->num != 0)
- tcp_bucket_unlock(sk);
+ if(sk->prev != NULL)
+ tcp_put_port(sk);
return inet6_destroy_sock(sk);
}
+/* Proc filesystem TCPv6 sock list dumping. */
+static void get_openreq6(struct sock *sk, struct open_request *req, char *tmpbuf, int i)
+{
+ struct in6_addr *dest, *src;
+
+ src = &req->af.v6_req.loc_addr;
+ dest = &req->af.v6_req.rmt_addr;
+ sprintf(tmpbuf,
+ "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+ "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d",
+ i,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3],
+ ntohs(sk->sport),
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3],
+ ntohs(req->rmt_port),
+ TCP_SYN_RECV,
+ 0,0, /* could print option size, but that is af dependent. */
+ 1, /* timers active (only the expire timer) */
+ (unsigned long)(req->expires - jiffies),
+ req->retrans,
+ sk->socket ? sk->socket->inode->i_uid : 0,
+ 0, /* non standard timer */
+ 0 /* open_requests have no inode */
+ );
+}
+
+static void get_tcp6_sock(struct sock *sp, char *tmpbuf, int i)
+{
+ struct in6_addr *dest, *src;
+ __u16 destp, srcp;
+ int timer_active, timer_active1, timer_active2;
+ unsigned long timer_expires;
+ struct tcp_opt *tp = &sp->tp_pinfo.af_tcp;
+
+ dest = &sp->net_pinfo.af_inet6.daddr;
+ src = &sp->net_pinfo.af_inet6.rcv_saddr;
+ destp = ntohs(sp->dport);
+ srcp = ntohs(sp->sport);
+ timer_active1 = tp->retransmit_timer.prev != NULL;
+ timer_active2 = sp->timer.prev != NULL;
+ timer_active = 0;
+ timer_expires = (unsigned) -1;
+ if (timer_active1 && tp->retransmit_timer.expires < timer_expires) {
+ timer_active = 1;
+ timer_expires = tp->retransmit_timer.expires;
+ }
+ if (timer_active2 && sp->timer.expires < timer_expires) {
+ timer_active = 2;
+ timer_expires = sp->timer.expires;
+ }
+ if(timer_active == 0)
+ timer_expires = jiffies;
+
+ sprintf(tmpbuf,
+ "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+ "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+ i,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp,
+ sp->state,
+ tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
+ timer_active, timer_expires-jiffies,
+ tp->retransmits,
+ sp->socket ? sp->socket->inode->i_uid : 0,
+ timer_active ? sp->timeout : 0,
+ sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+static void get_timewait6_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i)
+{
+ extern int tcp_tw_death_row_slot;
+ struct in6_addr *dest, *src;
+ __u16 destp, srcp;
+ int slot_dist;
+
+ dest = &tw->v6_daddr;
+ src = &tw->v6_rcv_saddr;
+ destp = ntohs(tw->dport);
+ srcp = ntohs(tw->sport);
+
+ slot_dist = tw->death_slot;
+ if(slot_dist > tcp_tw_death_row_slot)
+ slot_dist = (TCP_TWKILL_SLOTS - slot_dist) + tcp_tw_death_row_slot;
+ else
+ slot_dist = tcp_tw_death_row_slot - slot_dist;
+
+ sprintf(tmpbuf,
+ "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+ "%02X %08X:%08X %02X:%08X %08X %5d %8d %d",
+ i,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp,
+ TCP_TIME_WAIT, 0, 0,
+ 3, slot_dist * TCP_TWKILL_PERIOD, 0, 0, 0, 0);
+}
+
+int tcp6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+ int len = 0, num = 0, i;
+ off_t begin, pos = 0;
+ char tmpbuf[150];
+
+ if(offset < 149)
+ len += sprintf(buffer, "%-148s\n",
+ " sl " /* 6 */
+ "local_address " /* 38 */
+ "remote_address " /* 38 */
+ "st tx_queue rx_queue tr tm->when retrnsmt" /* 41 */
+ " uid timeout inode"); /* 21 */
+ /*----*/
+ /*144 */
+
+ pos = 149;
+ SOCKHASH_LOCK_READ();
+
+ /* First, walk listening socket table. */
+ for(i = 0; i < TCP_LHTABLE_SIZE; i++) {
+ struct sock *sk = tcp_listening_hash[i];
+
+ for (sk = tcp_listening_hash[i]; sk; sk = sk->next, num++) {
+ struct open_request *req;
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+
+ if (sk->family != PF_INET6)
+ continue;
+ pos += 149;
+ if (pos >= offset) {
+ get_tcp6_sock(sk, tmpbuf, num);
+ len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+ if (len >= length)
+ goto out;
+ }
+ for (req = tp->syn_wait_queue; req; req = req->dl_next, num++) {
+ if (req->sk)
+ continue;
+ pos += 149;
+ if (pos < offset)
+ continue;
+ get_openreq6(sk, req, tmpbuf, num);
+ len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+ if(len >= length)
+ goto out;
+ }
+ }
+ }
+
+ /* Next, walk established hash chain. */
+ for (i = 0; i < (tcp_ehash_size >> 1); i++) {
+ struct sock *sk;
+
+ for(sk = tcp_ehash[i]; sk; sk = sk->next, num++) {
+ if (sk->family != PF_INET6)
+ continue;
+ pos += 149;
+ if (pos < offset)
+ continue;
+ get_tcp6_sock(sk, tmpbuf, num);
+ len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+ if(len >= length)
+ goto out;
+ }
+ }
+
+ /* Finally, walk time wait buckets. */
+ for (i = (tcp_ehash_size>>1); i < tcp_ehash_size; i++) {
+ struct tcp_tw_bucket *tw;
+ for (tw = (struct tcp_tw_bucket *)tcp_ehash[i];
+ tw != NULL;
+ tw = (struct tcp_tw_bucket *)tw->next, num++) {
+ if (tw->family != PF_INET6)
+ continue;
+ pos += 149;
+ if (pos < offset)
+ continue;
+ get_timewait6_sock(tw, tmpbuf, num);
+ len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+ if(len >= length)
+ goto out;
+ }
+ }
+
+out:
+ SOCKHASH_UNLOCK_READ();
+
+ begin = len - (pos - offset);
+ *start = buffer + begin;
+ len -= begin;
+ if(len > length)
+ len = length;
+ if (len < 0)
+ len = 0;
+ return len;
+}
+
struct proto tcpv6_prot = {
- (struct sock *)&tcpv6_prot, /* sklist_next */
- (struct sock *)&tcpv6_prot, /* sklist_prev */
tcp_close, /* close */
tcp_v6_connect, /* connect */
tcp_accept, /* accept */
tcp_v6_do_rcv, /* backlog_rcv */
tcp_v6_hash, /* hash */
tcp_v6_unhash, /* unhash */
- tcp_v6_rehash, /* rehash */
- tcp_good_socknum, /* good_socknum */
- tcp_v6_verify_bind, /* verify_bind */
+ tcp_v6_get_port, /* get_port */
128, /* max_header */
0, /* retransmits */
"TCPv6", /* name */
*
* Based on linux/ipv4/udp.c
*
- * $Id: udp.c,v 1.42 1999/06/09 10:11:24 davem Exp $
+ * $Id: udp.c,v 1.43 1999/07/02 11:26:44 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
/* Grrr, addr_type already calculated by caller, but I don't want
* to add some silly "cookie" argument to this method just for that.
*/
-static int udp_v6_verify_bind(struct sock *sk, unsigned short snum)
+static int udp_v6_get_port(struct sock *sk, unsigned short snum)
{
- struct sock *sk2;
- int addr_type = ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr);
- int retval = 0, sk_reuse = sk->reuse;
-
- SOCKHASH_LOCK_READ();
- for(sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) {
- if((sk2->num == snum) && (sk2 != sk)) {
- unsigned char state = sk2->state;
- int sk2_reuse = sk2->reuse;
-
- /* Two sockets can be bound to the same port if they're
- * bound to different interfaces.
- */
-
- if(sk2->bound_dev_if != sk->bound_dev_if)
- continue;
-
- if(addr_type == IPV6_ADDR_ANY || (!sk2->rcv_saddr)) {
- if((!sk2_reuse) ||
- (!sk_reuse) ||
- (state == TCP_LISTEN)) {
- retval = 1;
- break;
- }
- } else if(!ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
- &sk2->net_pinfo.af_inet6.rcv_saddr)) {
- if((!sk_reuse) ||
- (!sk2_reuse) ||
- (state == TCP_LISTEN)) {
- retval = 1;
- break;
- }
+ SOCKHASH_LOCK_WRITE();
+ if (snum == 0) {
+ int best_size_so_far, best, result, i;
+
+ if (udp_port_rover > sysctl_local_port_range[1] ||
+ udp_port_rover < sysctl_local_port_range[0])
+ udp_port_rover = sysctl_local_port_range[0];
+ best_size_so_far = 32767;
+ best = result = udp_port_rover;
+ for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
+ struct sock *sk;
+ int size;
+
+ sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
+ if (!sk) {
+ if (result > sysctl_local_port_range[1])
+ result = sysctl_local_port_range[0] +
+ ((result - sysctl_local_port_range[0]) &
+ (UDP_HTABLE_SIZE - 1));
+ goto gotit;
}
+ size = 0;
+ do {
+ if (++size >= best_size_so_far)
+ goto next;
+ } while ((sk = sk->next) != NULL);
+ best_size_so_far = size;
+ best = result;
+ next:
+ }
+ result = best;
+ for(;; result += UDP_HTABLE_SIZE) {
+ if (result > sysctl_local_port_range[1])
+ result = sysctl_local_port_range[0]
+ + ((result - sysctl_local_port_range[0]) &
+ (UDP_HTABLE_SIZE - 1));
+ if (!udp_lport_inuse(result))
+ break;
+ }
+gotit:
+ udp_port_rover = snum = result;
+ } else {
+ struct sock *sk2;
+ int addr_type = ipv6_addr_type(&sk->net_pinfo.af_inet6.rcv_saddr);
+
+ for (sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)];
+ sk2 != NULL;
+ sk2 = sk2->next) {
+ if (sk2->num == snum &&
+ sk2 != sk &&
+ sk2->bound_dev_if == sk->bound_dev_if &&
+ (!sk2->rcv_saddr ||
+ addr_type == IPV6_ADDR_ANY ||
+ !ipv6_addr_cmp(&sk->net_pinfo.af_inet6.rcv_saddr,
+ &sk2->net_pinfo.af_inet6.rcv_saddr)) &&
+ (!sk2->reuse || !sk->reuse))
+ goto fail;
}
}
- SOCKHASH_UNLOCK_READ();
- return retval;
-}
-
-static void udp_v6_hash(struct sock *sk)
-{
- struct sock **skp;
- int num = sk->num;
- num &= (UDP_HTABLE_SIZE - 1);
- skp = &udp_hash[num];
+ sk->num = snum;
+ SOCKHASH_UNLOCK_WRITE();
+ return 0;
- SOCKHASH_LOCK_WRITE();
- sk->next = *skp;
- *skp = sk;
- sk->hashent = num;
+fail:
SOCKHASH_UNLOCK_WRITE();
+ return 1;
}
-static void udp_v6_unhash(struct sock *sk)
+static void udp_v6_hash(struct sock *sk)
{
- struct sock **skp;
- int num = sk->num;
-
- num &= (UDP_HTABLE_SIZE - 1);
- skp = &udp_hash[num];
+ struct sock **skp = &udp_hash[sk->num & (UDP_HTABLE_SIZE - 1)];
SOCKHASH_LOCK_WRITE();
- while(*skp != NULL) {
- if(*skp == sk) {
- *skp = sk->next;
- break;
- }
- skp = &((*skp)->next);
- }
+ if ((sk->next = *skp) != NULL)
+ (*skp)->pprev = &sk->next;
+ *skp = sk;
+ sk->pprev = skp;
+ sk->prot->inuse++;
+ if(sk->prot->highestinuse < sk->prot->inuse)
+ sk->prot->highestinuse = sk->prot->inuse;
SOCKHASH_UNLOCK_WRITE();
}
-static void udp_v6_rehash(struct sock *sk)
+static void udp_v6_unhash(struct sock *sk)
{
- struct sock **skp;
- int num = sk->num;
- int oldnum = sk->hashent;
-
- num &= (UDP_HTABLE_SIZE - 1);
- skp = &udp_hash[oldnum];
-
SOCKHASH_LOCK_WRITE();
- while(*skp != NULL) {
- if(*skp == sk) {
- *skp = sk->next;
- break;
- }
- skp = &((*skp)->next);
+ if (sk->pprev) {
+ if (sk->next)
+ sk->next->pprev = sk->pprev;
+ *sk->pprev = sk->next;
+ sk->pprev = NULL;
+ sk->prot->inuse--;
}
- sk->next = udp_hash[num];
- udp_hash[num] = sk;
- sk->hashent = num;
SOCKHASH_UNLOCK_WRITE();
}
}
if (addr_len < sizeof(*usin))
- return(-EINVAL);
+ return -EINVAL;
if (usin->sin6_family && usin->sin6_family != AF_INET6)
- return(-EAFNOSUPPORT);
+ return -EAFNOSUPPORT;
fl.fl6_flowlabel = 0;
if (np->sndflow) {
return -EMSGSIZE;
if (msg->msg_flags & ~(MSG_DONTROUTE|MSG_DONTWAIT))
- return(-EINVAL);
+ return -EINVAL;
fl.fl6_flowlabel = 0;
return udp_sendmsg(sk, msg, ulen);
if (addr_len < sizeof(*sin6))
- return(-EINVAL);
+ return -EINVAL;
if (sin6->sin6_family && sin6->sin6_family != AF_INET6)
- return(-EINVAL);
+ return -EINVAL;
if (sin6->sin6_port == 0)
- return(-EINVAL);
+ return -EINVAL;
udh.uh.dest = sin6->sin6_port;
daddr = &sin6->sin6_addr;
daddr = &sk->net_pinfo.af_inet6.daddr;
} else {
if (sk->state != TCP_ESTABLISHED)
- return(-ENOTCONN);
+ return -ENOTCONN;
udh.uh.dest = sk->dport;
daddr = &sk->net_pinfo.af_inet6.daddr;
"UDPv6" /* name */
};
+static void get_udp6_sock(struct sock *sp, char *tmpbuf, int i)
+{
+ struct in6_addr *dest, *src;
+ __u16 destp, srcp;
+ int timer_active;
+ unsigned long timer_expires;
+
+ dest = &sp->net_pinfo.af_inet6.daddr;
+ src = &sp->net_pinfo.af_inet6.rcv_saddr;
+ destp = ntohs(sp->dport);
+ srcp = ntohs(sp->sport);
+ timer_active = (sp->timer.prev != NULL) ? 2 : 0;
+ timer_expires = (timer_active == 2 ? sp->timer.expires : jiffies);
+ sprintf(tmpbuf,
+ "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
+ "%02X %08X:%08X %02X:%08lX %08X %5d %8d %ld",
+ i,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp,
+ sp->state,
+ atomic_read(&sp->wmem_alloc), atomic_read(&sp->rmem_alloc),
+ timer_active, timer_expires-jiffies, 0,
+ sp->socket->inode->i_uid, timer_active ? sp->timeout : 0,
+ sp->socket ? sp->socket->inode->i_ino : 0);
+}
+
+int udp6_get_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+ int len = 0, num = 0, i;
+ off_t pos = 0;
+ off_t begin;
+ char tmpbuf[150];
+
+ if (offset < 149)
+ len += sprintf(buffer, "%-148s\n",
+ " sl " /* 6 */
+ "local_address " /* 38 */
+ "remote_address " /* 38 */
+ "st tx_queue rx_queue tr tm->when retrnsmt" /* 41 */
+ " uid timeout inode"); /* 21 */
+ /*----*/
+ /*144 */
+ pos = 149;
+ SOCKHASH_LOCK_READ();
+ for (i = 0; i < UDP_HTABLE_SIZE; i++) {
+ struct sock *sk;
+
+ for (sk = udp_hash[i]; sk; sk = sk->next, num++) {
+ if (sk->family != PF_INET6)
+ continue;
+ pos += 149;
+ if (pos < offset)
+ continue;
+ get_udp6_sock(sk, tmpbuf, i);
+ len += sprintf(buffer+len, "%-148s\n", tmpbuf);
+ if(len >= length)
+ goto out;
+ }
+ }
+out:
+ SOCKHASH_UNLOCK_READ();
+ begin = len - (pos - offset);
+ *start = buffer + begin;
+ len -= begin;
+ if(len > length)
+ len = length;
+ if (len < 0)
+ len = 0;
+ return len;
+}
struct proto udpv6_prot = {
- (struct sock *)&udpv6_prot, /* sklist_next */
- (struct sock *)&udpv6_prot, /* sklist_prev */
udpv6_close, /* close */
udpv6_connect, /* connect */
NULL, /* accept */
udpv6_queue_rcv_skb, /* backlog_rcv */
udp_v6_hash, /* hash */
udp_v6_unhash, /* unhash */
- udp_v6_rehash, /* rehash */
- udp_good_socknum, /* good_socknum */
- udp_v6_verify_bind, /* verify_bind */
+ udp_v6_get_port, /* get_port */
128, /* max_header */
0, /* retransmits */
"UDP", /* name */
#include <net/transp_v6.h>
extern int tcp_tw_death_row_slot;
+extern int sysctl_local_port_range[2];
+extern int tcp_port_rover;
+extern int udp_port_rover;
#endif
#endif
EXPORT_SYMBOL(inet_recvmsg);
/* Socket demultiplexing. */
-EXPORT_SYMBOL(tcp_good_socknum);
EXPORT_SYMBOL(tcp_ehash);
EXPORT_SYMBOL(tcp_ehash_size);
EXPORT_SYMBOL(tcp_listening_hash);
EXPORT_SYMBOL(tcp_bhash);
EXPORT_SYMBOL(tcp_bhash_size);
-EXPORT_SYMBOL(udp_good_socknum);
EXPORT_SYMBOL(udp_hash);
EXPORT_SYMBOL(destroy_sock);
EXPORT_SYMBOL(tcp_v4_conn_request);
EXPORT_SYMBOL(tcp_create_openreq_child);
EXPORT_SYMBOL(tcp_bucket_create);
-EXPORT_SYMBOL(tcp_bucket_unlock);
+EXPORT_SYMBOL(__tcp_put_port);
+EXPORT_SYMBOL(tcp_put_port);
+EXPORT_SYMBOL(tcp_inherit_port);
EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
EXPORT_SYMBOL(tcp_v4_do_rcv);
EXPORT_SYMBOL(tcp_v4_connect);
EXPORT_SYMBOL(tcp_connect);
EXPORT_SYMBOL(tcp_make_synack);
EXPORT_SYMBOL(tcp_tw_death_row_slot);
+EXPORT_SYMBOL(sysctl_local_port_range);
+EXPORT_SYMBOL(tcp_port_rover);
+EXPORT_SYMBOL(udp_port_rover);
EXPORT_SYMBOL(tcp_sync_mss);
EXPORT_SYMBOL(net_statistics);
#define __KERNEL_SYSCALLS__
#include <linux/version.h>
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/malloc.h>
#include <linux/sched.h>