From 0d1267fec4c0b94a21d452b9b04da814f48b0799 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Nov 2007 15:13:28 -0500 Subject: [PATCH] Import 2.1.45pre1 --- drivers/char/random.c | 5 +- fs/attr.c | 2 +- fs/autofs/inode.c | 1 - fs/autofs/root.c | 3 +- fs/binfmt_aout.c | 1 - fs/binfmt_elf.c | 1 - fs/dquot.c | 1 - fs/ext2/dir.c | 5 +- fs/ext2/file.c | 4 +- fs/ext2/ialloc.c | 6 +- fs/ext2/inode.c | 9 +- fs/ext2/ioctl.c | 4 +- fs/ext2/namei.c | 46 +- fs/ext2/symlink.c | 10 +- fs/ext2/truncate.c | 20 +- fs/inode.c | 926 +++++++++++++++++---------------------- fs/isofs/dir.c | 1 + fs/isofs/file.c | 1 + fs/isofs/inode.c | 6 +- fs/isofs/namei.c | 40 +- fs/minix/dir.c | 1 + fs/minix/file.c | 1 + fs/minix/inode.c | 17 +- fs/minix/namei.c | 196 ++++----- fs/nfs/inode.c | 1 - fs/open.c | 23 - fs/pipe.c | 9 +- fs/proc/inode.c | 1 - fs/read_write.c | 3 - fs/stat.c | 5 +- fs/ufs/ufs_file.c | 1 + fs/ufs/ufs_namei.c | 4 +- fs/ufs/ufs_super.c | 2 +- include/linux/fs.h | 79 ++-- include/linux/iso_fs.h | 2 +- include/linux/list.h | 39 ++ include/linux/minix_fs.h | 21 +- include/linux/ufs_fs.h | 2 +- kernel/ksyms.c | 6 +- kernel/sys.c | 2 - mm/filemap.c | 11 +- 41 files changed, 669 insertions(+), 849 deletions(-) create mode 100644 include/linux/list.h diff --git a/drivers/char/random.c b/drivers/char/random.c index 4cccb8faf769..527ac860930a 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1125,8 +1125,7 @@ random_read(struct inode * inode, struct file * file, char * buf, unsigned long * update the access time. */ if (inode && count != 0) { - inode->i_atime = CURRENT_TIME; - inode->i_dirt = 1; + UPDATE_ATIME(inode); } return (count ? count : retval); @@ -1182,7 +1181,7 @@ random_write(struct inode * inode, struct file * file, } if ((ret > 0) && inode) { inode->i_mtime = CURRENT_TIME; - inode->i_dirt = 1; + mark_inode_dirty(inode); } return ret; } diff --git a/fs/attr.c b/fs/attr.c index be824dd4ac52..7a6b9f81423d 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -74,7 +74,7 @@ void inode_setattr(struct inode * inode, struct iattr * attr) if (!fsuser() && !in_group_p(inode->i_gid)) inode->i_mode &= ~S_ISGID; } - inode->i_dirt = 1; + mark_inode_dirty(inode); } } diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 5560989a11b1..d52633024deb 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -274,5 +274,4 @@ static void autofs_read_inode(struct inode *inode) static void autofs_write_inode(struct inode *inode) { - inode->i_dirt = 0; } diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 30c9e41b05ce..d84a32c0bc4f 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -136,9 +136,10 @@ static int autofs_root_lookup(struct inode *dir, struct qstr *str, struct inode return -EACCES; } - if ( !oz_mode && S_ISDIR(res->i_mode) && res->i_sb == dir->i_sb ) { + if ( !oz_mode && S_ISDIR(res->i_mode) && res->i_dentry->d_covers == res->i_dentry ) { /* Not a mount point yet, call 1-800-DAEMON */ DPRINTK(("autofs: waiting on non-mountpoint dir, inode = %lu, pid = %u, pgrp = %u\n", res->i_ino, current->pid, current->pgrp)); + iput(res); res = NULL; up(&dir->i_sem); status = autofs_wait(sbi,str); diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 394f41eb1773..d9ef6d6acd75 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -214,7 +214,6 @@ do_aout_core_dump(long signr, struct pt_regs * regs) /* Finally dump the task struct. Not be used by gdb, but could be useful */ set_fs(KERNEL_DS); DUMP_WRITE(current,sizeof(*current)); - inode->i_status |= ST_MODIFIED; close_coredump: if (file.f_op->release) file.f_op->release(inode,&file); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f62a001916cd..3edccaf7e3f0 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -884,7 +884,6 @@ static int load_elf_library(int fd) */ static int dump_write(struct file *file, const void *addr, int nr) { - file->f_inode->i_status |= ST_MODIFIED; return file->f_op->write(file->f_inode, file, addr, nr) == nr; } diff --git a/fs/dquot.c b/fs/dquot.c index b830193d0e99..d00a5594602e 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -241,7 +241,6 @@ static void write_dquot(struct dquot *dquot) if (filp->f_op->write(filp->f_inode, filp, (char *)&dquot->dq_dqb, sizeof(struct dqblk)) == sizeof(struct dqblk)) dquot->dq_flags &= ~DQ_MOD; - /* inode->i_status |= ST_MODIFIED is willingly *not* done here */ up(&dquot->dq_mnt->mnt_sem); set_fs(fs); diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index bf64a2a1aead..b75acdef5fa1 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -212,9 +212,6 @@ revalidate: offset = 0; brelse (bh); } - if (DO_UPDATE_ATIME(inode)) { - inode->i_atime = CURRENT_TIME; - inode->i_dirt = 1; - } + UPDATE_ATIME(inode); return 0; } diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 61d552d8c7c0..c7c83f013b50 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -122,7 +122,7 @@ static inline void remove_suid(struct inode *inode) mode &= inode->i_mode; if (mode && !suser()) { inode->i_mode &= ~mode; - inode->i_dirt = 1; + mark_inode_dirty(inode); } } @@ -251,7 +251,7 @@ static long ext2_file_write (struct inode * inode, struct file * filp, inode->u.ext2_i.i_osync--; inode->i_ctime = inode->i_mtime = CURRENT_TIME; filp->f_pos = pos; - inode->i_dirt = 1; + mark_inode_dirty(inode); return written; } diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 3987e3afa7f8..f192072923e4 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -216,7 +216,7 @@ void ext2_free_inode (struct inode * inode) es->s_free_inodes_count = cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1); mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); - inode->i_dirt = 0; + mark_inode_dirty(inode); } mark_buffer_dirty(bh, 1); if (sb->s_flags & MS_SYNCHRONOUS) { @@ -240,7 +240,7 @@ static void inc_inode_version (struct inode * inode, int mode) { inode->u.ext2_i.i_version++; - inode->i_dirt = 1; + mark_inode_dirty(inode); return; } @@ -416,7 +416,7 @@ repeat: mode |= S_ISGID; } else inode->i_gid = current->fsgid; - inode->i_dirt = 1; + mark_inode_dirty(inode); inode->i_ino = j; inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size (for stat), not the fs block size */ inode->i_blocks = 0; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index f2dbff2d1a55..d00d49e856b5 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -38,7 +38,7 @@ void ext2_put_inode (struct inode * inode) inode->i_ino == EXT2_ACL_DATA_INO) return; inode->u.ext2_i.i_dtime = CURRENT_TIME; - inode->i_dirt = 1; + mark_inode_dirty(inode); ext2_update_inode(inode, IS_SYNC(inode)); inode->i_size = 0; if (inode->i_blocks) @@ -248,7 +248,7 @@ repeat: if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) ext2_sync_inode (inode); else - inode->i_dirt = 1; + mark_inode_dirty(inode); return result; } @@ -322,7 +322,7 @@ repeat: } inode->i_ctime = CURRENT_TIME; inode->i_blocks += blocks; - inode->i_dirt = 1; + mark_inode_dirty(inode); inode->u.ext2_i.i_next_alloc_block = new_block; inode->u.ext2_i.i_next_alloc_goal = tmp; brelse (bh); @@ -591,7 +591,6 @@ static int ext2_update_inode(struct inode * inode, int do_sync) else for (block = 0; block < EXT2_N_BLOCKS; block++) raw_inode->i_block[block] = cpu_to_le32(inode->u.ext2_i.i_data[block]); mark_buffer_dirty(bh, 1); - inode->i_dirt = 0; if (do_sync) { ll_rw_block (WRITE, 1, &bh); wait_on_buffer (bh); @@ -671,7 +670,7 @@ int ext2_notify_change(struct inode *inode, struct iattr *iattr) inode->i_flags &= ~S_IMMUTABLE; inode->u.ext2_i.i_flags &= ~EXT2_IMMUTABLE_FL; } - inode->i_dirt = 1; + mark_inode_dirty(inode); return 0; } diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 387600bbfd47..c0514c01e14e 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -62,7 +62,7 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, else inode->i_flags &= ~MS_NOATIME; inode->i_ctime = CURRENT_TIME; - inode->i_dirt = 1; + mark_inode_dirty(inode); return 0; case EXT2_IOC_GETVERSION: return put_user(inode->u.ext2_i.i_version, (int *) arg); @@ -74,7 +74,7 @@ int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, if (get_user(inode->u.ext2_i.i_version, (int *) arg)) return -EFAULT; inode->i_ctime = CURRENT_TIME; - inode->i_dirt = 1; + mark_inode_dirty(inode); return 0; default: return -ENOTTY; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 85981a7982ea..229cbddd8ee4 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -252,7 +252,7 @@ static struct buffer_head * ext2_add_entry (struct inode * dir, de->inode = le32_to_cpu(0); de->rec_len = le16_to_cpu(sb->s_blocksize); dir->i_size = offset + sb->s_blocksize; - dir->i_dirt = 1; + mark_inode_dirty(dir); } else { ext2_debug ("skipping to next block\n"); @@ -297,7 +297,7 @@ static struct buffer_head * ext2_add_entry (struct inode * dir, * and/or different from the directory change time. */ dir->i_mtime = dir->i_ctime = CURRENT_TIME; - dir->i_dirt = 1; + mark_inode_dirty(dir); dir->i_version = ++event; mark_buffer_dirty(bh, 1); *res_dir = de; @@ -366,11 +366,11 @@ int ext2_create (struct inode * dir, struct dentry * dentry, int mode) inode->i_op = &ext2_file_inode_operations; inode->i_mode = mode; - inode->i_dirt = 1; + mark_inode_dirty(inode); bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err); if (!bh) { inode->i_nlink--; - inode->i_dirt = 1; + mark_inode_dirty(inode); iput (inode); return err; } @@ -423,11 +423,11 @@ int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, int rdev) init_fifo(inode); if (S_ISBLK(mode) || S_ISCHR(mode)) inode->i_rdev = to_kdev_t(rdev); - inode->i_dirt = 1; + mark_inode_dirty(inode); bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err); if (!bh) { inode->i_nlink--; - inode->i_dirt = 1; + mark_inode_dirty(inode); iput(inode); return err; } @@ -465,7 +465,7 @@ int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) dir_block = ext2_bread (inode, 0, 1, &err); if (!dir_block) { inode->i_nlink--; - inode->i_dirt = 1; + mark_inode_dirty(inode); iput (inode); return err; } @@ -486,11 +486,11 @@ int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) inode->i_mode = S_IFDIR | (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask); if (dir->i_mode & S_ISGID) inode->i_mode |= S_ISGID; - inode->i_dirt = 1; + mark_inode_dirty(inode); bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err); if (!bh) { inode->i_nlink = 0; - inode->i_dirt = 1; + mark_inode_dirty(inode); iput (inode); return err; } @@ -502,7 +502,7 @@ int ext2_mkdir(struct inode * dir, struct dentry * dentry, int mode) wait_on_buffer (bh); } dir->i_nlink++; - dir->i_dirt = 1; + mark_inode_dirty(dir); d_instantiate(dentry, inode, D_DIR); brelse (bh); return 0; @@ -640,10 +640,10 @@ int ext2_rmdir (struct inode * dir, struct dentry *dentry) inode->i_nlink); inode->i_version = ++event; inode->i_nlink = 0; - inode->i_dirt = 1; + mark_inode_dirty(inode); dir->i_nlink--; inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - dir->i_dirt = 1; + mark_inode_dirty(dir); d_delete(dentry); end_rmdir: @@ -701,9 +701,9 @@ int ext2_unlink(struct inode * dir, struct dentry *dentry) wait_on_buffer (bh); } dir->i_ctime = dir->i_mtime = CURRENT_TIME; - dir->i_dirt = 1; + mark_inode_dirty(dir); inode->i_nlink--; - inode->i_dirt = 1; + mark_inode_dirty(inode); inode->i_ctime = dir->i_ctime; retval = 0; d_delete(dentry); /* This also frees the inode */ @@ -738,7 +738,7 @@ int ext2_symlink (struct inode * dir, struct dentry *dentry, const char * symnam name_block = ext2_bread (inode, 0, 1, &err); if (!name_block) { inode->i_nlink--; - inode->i_dirt = 1; + mark_inode_dirty(inode); iput (inode); return err; } @@ -758,12 +758,12 @@ int ext2_symlink (struct inode * dir, struct dentry *dentry, const char * symnam brelse (name_block); } inode->i_size = i; - inode->i_dirt = 1; + mark_inode_dirty(inode); bh = ext2_add_entry (dir, dentry->d_name.name, dentry->d_name.len, &de, &err); if (!bh) { inode->i_nlink--; - inode->i_dirt = 1; + mark_inode_dirty(inode); iput (inode); return err; } @@ -808,7 +808,7 @@ int ext2_link (struct inode * inode, struct inode * dir, struct dentry *dentry) brelse (bh); inode->i_nlink++; inode->i_ctime = CURRENT_TIME; - inode->i_dirt = 1; + mark_inode_dirty(inode); atomic_inc(&inode->i_count); d_instantiate(dentry, inode, 0); return 0; @@ -953,21 +953,21 @@ static int do_ext2_rename (struct inode * old_dir, struct dentry *old_dentry, if (new_inode) { new_inode->i_nlink--; new_inode->i_ctime = CURRENT_TIME; - new_inode->i_dirt = 1; + mark_inode_dirty(new_inode); } old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; - old_dir->i_dirt = 1; + mark_inode_dirty(old_dir); if (dir_bh) { PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino); mark_buffer_dirty(dir_bh, 1); old_dir->i_nlink--; - old_dir->i_dirt = 1; + mark_inode_dirty(old_dir); if (new_inode) { new_inode->i_nlink--; - new_inode->i_dirt = 1; + mark_inode_dirty(new_inode); } else { new_dir->i_nlink++; - new_dir->i_dirt = 1; + mark_inode_dirty(new_dir); } } mark_buffer_dirty(old_bh, 1); diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c index 654736c0df01..781f9165d076 100644 --- a/fs/ext2/symlink.c +++ b/fs/ext2/symlink.c @@ -65,10 +65,7 @@ static struct dentry * ext2_follow_link(struct inode * inode, struct dentry *bas } link = bh->b_data; } - if (!IS_RDONLY(inode)) { - inode->i_atime = CURRENT_TIME; - inode->i_dirt = 1; - } + UPDATE_ATIME(inode); base = lookup_dentry(link, base, 1); if (bh) brelse(bh); @@ -101,10 +98,7 @@ static int ext2_readlink (struct inode * inode, char * buffer, int buflen) i++; if (copy_to_user(buffer, link, i)) i = -EFAULT; - if (DO_UPDATE_ATIME(inode)) { - inode->i_atime = CURRENT_TIME; - inode->i_dirt = 1; - } + UPDATE_ATIME(inode); if (bh) brelse (bh); return i; diff --git a/fs/ext2/truncate.c b/fs/ext2/truncate.c index a9e59ca00c89..39e5e49a7444 100644 --- a/fs/ext2/truncate.c +++ b/fs/ext2/truncate.c @@ -91,7 +91,7 @@ repeat: } *p = 0; inode->i_blocks -= blocks; - inode->i_dirt = 1; + mark_inode_dirty(inode); bforget(bh); if (free_count == 0) { block_to_free = tmp; @@ -172,7 +172,7 @@ repeat: } /* ext2_free_blocks (inode, tmp, 1); */ inode->i_blocks -= blocks; - inode->i_dirt = 1; + mark_inode_dirty(inode); } if (free_count > 0) ext2_free_blocks (inode, block_to_free, free_count); @@ -187,7 +187,7 @@ repeat: tmp = *p; *p = 0; inode->i_blocks -= blocks; - inode->i_dirt = 1; + mark_inode_dirty(inode); ext2_free_blocks (inode, tmp, 1); } if (IS_SYNC(inode) && buffer_dirty(ind_bh)) { @@ -259,7 +259,7 @@ repeat: } /* ext2_free_blocks (inode, tmp, 1); */ inode->i_blocks -= blocks; - inode->i_dirt = 1; + mark_inode_dirty(inode); } if (free_count > 0) ext2_free_blocks (inode, block_to_free, free_count); @@ -274,7 +274,7 @@ repeat: tmp = le32_to_cpu(*p); *p = cpu_to_le32(0); inode->i_blocks -= blocks; - inode->i_dirt = 1; + mark_inode_dirty(inode); ext2_free_blocks (inode, tmp, 1); } if (IS_SYNC(inode) && buffer_dirty(ind_bh)) { @@ -334,7 +334,7 @@ repeat: tmp = *p; *p = 0; inode->i_blocks -= blocks; - inode->i_dirt = 1; + mark_inode_dirty(inode); ext2_free_blocks (inode, tmp, 1); } if (IS_SYNC(inode) && buffer_dirty(dind_bh)) { @@ -393,7 +393,7 @@ repeat: tmp = le32_to_cpu(*p); *p = cpu_to_le32(0); inode->i_blocks -= blocks; - inode->i_dirt = 1; + mark_inode_dirty(inode); ext2_free_blocks (inode, tmp, 1); } if (IS_SYNC(inode) && buffer_dirty(dind_bh)) { @@ -452,7 +452,7 @@ repeat: tmp = *p; *p = 0; inode->i_blocks -= blocks; - inode->i_dirt = 1; + mark_inode_dirty(inode); ext2_free_blocks (inode, tmp, 1); } if (IS_SYNC(inode) && buffer_dirty(tind_bh)) { @@ -486,7 +486,7 @@ void ext2_truncate (struct inode * inode) retry |= trunc_tindirect (inode); if (!retry) break; - if (IS_SYNC(inode) && inode->i_dirt) + if (IS_SYNC(inode) && test_bit(I_DIRTY, &inode->i_state)) ext2_sync_inode (inode); current->counter = 0; schedule (); @@ -510,5 +510,5 @@ void ext2_truncate (struct inode * inode) } } inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_dirt = 1; + mark_inode_dirty(inode); } diff --git a/fs/inode.c b/fs/inode.c index 0f11cde829dd..e33ce5fd6295 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1,628 +1,518 @@ /* - * fs/inode.c + * linux/fs/inode.c * - * Complete reimplementation - * (C) 1997 Thomas Schoebel-Theuer + * (C) 1997 Linus Torvalds */ -/* Everything here is intended to be MP-safe. However, other parts - * of the kernel are not yet MP-safe, in particular the inode->i_count++ - * that are spread over everywhere. These should be replaced by - * iinc() as soon as possible. Since I have no MP machine, I could - * not test it. - */ -#include -#include #include #include #include -#include -#include #include -#include - -/* #define DEBUG */ - -#define HASH_SIZE 1024 /* must be a power of 2 */ -#define NR_LEVELS 4 - -#define ST_AGED 1 -#define ST_HASHED 2 -#define ST_EMPTY 4 -#define ST_TO_READ 8 -#define ST_TO_WRITE 16 -#define ST_TO_PUT 32 -#define ST_TO_DROP 64 -#define ST_IO (ST_TO_READ|ST_TO_WRITE|ST_TO_PUT|ST_TO_DROP) -#define ST_WAITING 128 -#define ST_FREEING 256 -#define ST_IBASKET 512 - -/* The idea is to keep empty inodes in a separate list, so no search - * is required as long as empty inodes exit. - * All reusable inodes occurring in the hash table with i_count==0 - * are also registered in the ringlist aged_i[level], but in LRU order. - * Used inodes with i_count>0 are kept solely in the hashtable and in - * all_i, but in no other list. - * The level is used for multilevel aging to avoid thrashing; each - * time i_count decreases to 0, the inode is inserted into the next level - * ringlist. Cache reusage is simply by taking the _last_ element from the - * lowest-level ringlist that contains inodes. - * In contrast to the old code, there isn't any O(n) search overhead now - * in iget/iput (if you make HASH_SIZE large enough). +#include + +/* + * New inode.c implementation. + * + * This implementation has the basic premise of trying + * to be extremely low-overhead and SMP-safe, yet be + * simple enough to be "obviously correct". + * + * Famous last words. + */ + +/* + * Inode lookup is no longer as critical as it used to be: + * most of the lookups are going to be through the dcache. + */ +#define HASH_BITS 8 +#define HASH_SIZE (1UL << HASH_BITS) +#define HASH_MASK (HASH_SIZE-1) + +/* + * Each inode can be on two separate lists. One is + * the hash list of the inode, used for lookups. The + * other linked list is the "type" list: + * "in_use" - valid inode, hashed + * "dirty" - valid inode, hashed, dirty. + * "unused" - ready to be re-used. Not hashed. + * + * The two first versions also have a dirty list, allowing + * for low-overhead inode sync() operations. + */ + +LIST_HEAD(inode_in_use); +LIST_HEAD(inode_dirty); +LIST_HEAD(inode_unused); +struct list_head inode_hashtable[HASH_SIZE]; + +/* + * A simple spinlock to protect the list manipulations + */ +spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; + +/* + * Statistics gathering.. Not actually done yet. */ -static struct inode * hashtable[HASH_SIZE];/* linked with i_hash_{next,prev} */ -static struct inode * all_i = NULL; /* linked with i_{next,prev} */ -static struct inode * empty_i = NULL; /* linked with i_{next,prev} */ -static struct inode * aged_i[NR_LEVELS+1]; /* linked with i_lru_{next,prev} */ -static int aged_reused[NR_LEVELS+1]; /* # removals from aged_i[level] */ -static int age_table[NR_LEVELS+1] = { /* You may tune this. */ - 1, 4, 10, 100, 1000 -}; /* after which # of uses to increase to the next level */ - -/* This is for kernel/sysctl.c */ - -/* Just aligning plain ints and arrays thereof doesn't work reliably.. */ struct { int nr_inodes; int nr_free_inodes; - int aged_count[NR_LEVELS+1]; /* # in each level */ + int dummy[10]; } inodes_stat; int max_inodes = NR_INODE; -unsigned long last_inode = 0; -void inode_init(void) +void __mark_inode_dirty(struct inode *inode) { - memset(hashtable, 0, sizeof(hashtable)); - memset(aged_i, 0, sizeof(aged_i)); - memset(aged_reused, 0, sizeof(aged_reused)); - memset(&inodes_stat, 0, sizeof(inodes_stat)); + spin_lock(&inode_lock); + list_del(&inode->i_list); + list_add(&inode->i_list, &inode_dirty); + spin_unlock(&inode_lock); } -/* Intended for short locks of the above global data structures. - * Could be replaced with spinlocks completely, since there is - * no blocking during manipulation of the static data; however the - * lock in invalidate_inodes() may last relatively long. - */ -#ifdef __SMP__ -struct semaphore vfs_sem = MUTEX; -#endif - -DEF_INSERT(all,struct inode,i_next,i_prev) -DEF_REMOVE(all,struct inode,i_next,i_prev) - -DEF_INSERT(lru,struct inode,i_lru_next,i_lru_prev) -DEF_REMOVE(lru,struct inode,i_lru_next,i_lru_prev) +static inline void unlock_inode(struct inode *inode) +{ + clear_bit(I_LOCK, &inode->i_state); + wake_up(&inode->i_wait); +} -DEF_INSERT(hash,struct inode,i_hash_next,i_hash_prev) -DEF_REMOVE(hash,struct inode,i_hash_next,i_hash_prev) +static void __wait_on_inode(struct inode * inode) +{ + struct wait_queue wait = { current, NULL }; -DEF_INSERT(ibasket,struct inode,i_basket_next,i_basket_prev) -DEF_REMOVE(ibasket,struct inode,i_basket_next,i_basket_prev) + add_wait_queue(&inode->i_wait, &wait); +repeat: + current->state = TASK_UNINTERRUPTIBLE; + if (test_bit(I_LOCK, &inode->i_state)) { + schedule(); + goto repeat; + } + remove_wait_queue(&inode->i_wait, &wait); + current->state = TASK_RUNNING; +} -#ifdef DEBUG -extern void printpath(struct dentry * entry); -struct inode * xtst[15000]; -int xcnt = 0; +static inline void wait_on_inode(struct inode *inode) +{ + if (test_bit(I_LOCK, &inode->i_state)) + __wait_on_inode(inode); +} -void xcheck(char * txt, struct inode * p) +/* + * These are initializations that only need to be done + * once, because the fields are idempotent across use + * of the inode.. + */ +static inline void init_once(struct inode * inode) { - int i; - for(i=xcnt-1; i>=0; i--) - if (xtst[i] == p) - return; - printk("Bogus inode %p in %s\n", p, txt); + memset(inode, 0, sizeof(*inode)); + init_waitqueue(&inode->i_wait); + sema_init(&inode->i_sem, 1); } -#else -#define xcheck(t,p) /*nothing*/ -#endif -static inline struct inode * grow_inodes(void) + +/* + * Look out! This returns with the inode lock held if + * it got an inode.. + */ +static struct inode * grow_inodes(void) { - struct inode * res; - struct inode * inode = res = (struct inode*)__get_free_page(GFP_KERNEL); - int size = PAGE_SIZE; - if (!inode) - return NULL; - - size -= sizeof(struct inode); - inode++; - inodes_stat.nr_inodes++; -#ifdef DEBUG -xtst[xcnt++]=res; -#endif - while(size >= sizeof(struct inode)) { -#ifdef DEBUG -xtst[xcnt++]=inode; -#endif - inodes_stat.nr_inodes++; - inodes_stat.nr_free_inodes++; - insert_all(&empty_i, inode); - inode->i_status = ST_EMPTY; - inode++; - size -= sizeof(struct inode); + struct inode * inode = (struct inode *)__get_free_page(GFP_KERNEL); + + if (inode) { + int size; + struct inode * tmp; + + spin_lock(&inode_lock); + size = PAGE_SIZE - 2*sizeof(struct inode); + tmp = inode; + do { + tmp++; + init_once(tmp); + list_add(&tmp->i_list, &inode_unused); + size -= sizeof(struct inode); + } while (size >= 0); + init_once(inode); } - return res; + return inode; } -static inline int hash(dev_t i_dev, unsigned long i_ino) +static inline void write_inode(struct inode *inode) { - return ((int)i_ino ^ ((int)i_dev << 6)) & (HASH_SIZE-1); + if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode) + inode->i_sb->s_op->write_inode(inode); } -static inline blocking void wait_io(struct inode * inode, unsigned short flags) +static inline void sync_list(struct list_head *head, struct list_head *clean) { - while(inode->i_status & flags) { - struct wait_queue wait = {current, NULL}; - inode->i_status |= ST_WAITING; - vfs_unlock(); - add_wait_queue(&inode->i_wait, &wait); - sleep_on(&inode->i_wait); - remove_wait_queue(&inode->i_wait, &wait); - vfs_lock(); - } + struct list_head * tmp; + + while ((tmp = head->prev) != head) { + struct inode *inode = list_entry(tmp, struct inode, i_list); + list_del(tmp); + + /* + * If the inode is locked, it's already being written out. + * We have to wait for it, though. + */ + if (test_bit(I_LOCK, &inode->i_state)) { + list_add(tmp, head); + spin_unlock(&inode_lock); + __wait_on_inode(inode); + } else { + list_add(tmp, clean); + clear_bit(I_DIRTY, &inode->i_state); + set_bit(I_LOCK, &inode->i_state); + spin_unlock(&inode_lock); + write_inode(inode); + unlock_inode(inode); + } + spin_lock(&inode_lock); + } } -static inline blocking void set_io(struct inode * inode, - unsigned short waitflags, - unsigned short setflags) +/* + * "sync_inodes()" goes through the dirty list + * and writes them out and puts them back on + * the normal list. + */ +void sync_inodes(kdev_t dev) { - wait_io(inode, waitflags); - inode->i_status |= setflags; - vfs_unlock(); + spin_lock(&inode_lock); + sync_list(&inode_dirty, &inode_in_use); + spin_unlock(&inode_lock); } -static inline blocking int release_io(struct inode * inode, unsigned short flags) +/* + * This is called by the filesystem to tell us + * that the inode is no longer useful. We just + * terminate it with extreme predjudice. + */ +void clear_inode(struct inode *inode) { - int res = 0; - vfs_lock(); - inode->i_status &= ~flags; - if (inode->i_status & ST_WAITING) { - inode->i_status &= ~ST_WAITING; - vfs_unlock(); - wake_up(&inode->i_wait); - res = 1; - } - return res; + truncate_inode_pages(inode, 0); + wait_on_inode(inode); + if (IS_WRITABLE(inode) && inode->i_sb && inode->i_sb->dq_op) + inode->i_sb->dq_op->drop(inode); + + spin_lock(&inode_lock); + inode->i_state = 0; + list_del(&inode->i_hash); + list_del(&inode->i_list); + list_add(&inode->i_list, &inode_unused); + spin_unlock(&inode_lock); } -static inline blocking void _io(void (*op)(struct inode*), struct inode * inode, - unsigned short waitflags, unsigned short setflags) +#define CAN_UNUSE(inode) \ + ((atomic_read(&(inode)->i_count) == 0) && \ + ((inode)->i_nrpages == 0) && \ + (!test_bit(I_LOCK, &(inode)->i_state))) + +static void invalidate_list(struct list_head *head, kdev_t dev) { - /* Do nothing if the same op is already in progress. */ - if (op && !(inode->i_status & setflags)) { - set_io(inode, waitflags, setflags); - op(inode); - if (release_io(inode, setflags)) { - /* Somebody grabbed my inode from under me. */ -#ifdef DEBUG - printk("_io grab!\n"); -#endif - vfs_lock(); - } + struct list_head *next; + + next = head->next; + for (;;) { + struct list_head * tmp = next; + struct inode * inode; + + next = next->next; + if (tmp == head) + break; + inode = list_entry(tmp, struct inode, i_list); + if (inode->i_dev != dev) + continue; + if (!CAN_UNUSE(inode)) + continue; + list_del(&inode->i_hash); + list_del(&inode->i_list); + list_add(&inode->i_list, &inode_unused); } } -blocking void _clear_inode(struct inode * inode, int external, int verbose) +void invalidate_inodes(kdev_t dev) { -xcheck("_clear_inode",inode); - if (inode->i_status & ST_IBASKET) { - struct super_block * sb = inode->i_sb; - remove_ibasket(&sb->s_ibasket, inode); - sb->s_ibasket_count--; - inode->i_status &= ~ST_IBASKET; -#if 0 -printpath(inode->i_dentry); -printk(" put_inode\n"); -#endif - _io(sb->s_op->put_inode, inode, ST_TO_PUT|ST_TO_WRITE, ST_TO_PUT); - if (inode->i_status & ST_EMPTY) - return; - } - if (inode->i_status & ST_HASHED) - remove_hash(&hashtable[hash(inode->i_dev, inode->i_ino)], inode); - if (inode->i_status & ST_AGED) { - /* "cannot happen" when called from an fs because at least - * the caller must use it. Can happen when called from - * invalidate_inodes(). */ - if (verbose) - printk("VFS: clearing aged inode\n"); - if (atomic_read(&inode->i_count)) - printk("VFS: aged inode is in use\n"); - remove_lru(&aged_i[inode->i_level], inode); - inodes_stat.aged_count[inode->i_level]--; - } - if (!external && inode->i_status & ST_IO) { - printk("VFS: clearing inode during IO operation\n"); - } - if (!(inode->i_status & ST_EMPTY)) { - remove_all(&all_i, inode); - inode->i_status = ST_EMPTY; - if (inode->i_pages) { - vfs_unlock(); /* may block, can that be revised? */ - truncate_inode_pages(inode, 0); - vfs_lock(); - } - insert_all(&empty_i, inode); - inodes_stat.nr_free_inodes++; - } else if(external) - printk("VFS: empty inode is unnecessarily cleared multiple " - "times by an fs\n"); - else - printk("VFS: clearing empty inode\n"); - inode->i_status = ST_EMPTY; - /* The inode is not really cleared any more here, but only once - * when taken from empty_i. This saves instructions and processor - * cache pollution. - */ + spin_lock(&inode_lock); + invalidate_list(&inode_in_use, dev); + invalidate_list(&inode_dirty, dev); + spin_unlock(&inode_lock); } -void insert_inode_hash(struct inode * inode) +/* + * This is called with the inode lock held. It just looks at the last + * inode on the in-use list, and if the inode is trivially freeable + * we just move it to the unused list. + * + * Otherwise we just move the inode to be the first inode and expect to + * get back to the problem later.. + */ +static void try_to_free_inodes(void) { -xcheck("insert_inode_hash",inode); - vfs_lock(); - if (!(inode->i_status & ST_HASHED)) { - insert_hash(&hashtable[hash(inode->i_dev, inode->i_ino)], inode); - inode->i_status |= ST_HASHED; - } else - printk("VFS: trying to hash an inode again\n"); - vfs_unlock(); + struct list_head * tmp; + struct list_head *head = &inode_in_use; + + tmp = head->prev; + if (tmp != head) { + struct inode * inode; + + list_del(tmp); + inode = list_entry(tmp, struct inode, i_list); + if (CAN_UNUSE(inode)) { + list_del(&inode->i_hash); + head = &inode_unused; + } + list_add(tmp, head); + } } + -blocking struct inode * _get_empty_inode(void) +static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head) { + struct list_head *tmp; struct inode * inode; - int retry = 0; -retry: - inode = empty_i; - if (inode) { - remove_all(&empty_i, inode); - inodes_stat.nr_free_inodes--; - } else if(inodes_stat.nr_inodes < max_inodes || retry > 2) { - inode = grow_inodes(); - } - if (!inode) { - int level; - int usable = 0; - for(level = 0; level <= NR_LEVELS; level++) - if (aged_i[level]) { - inode = aged_i[level]->i_lru_prev; - /* Here is the picking strategy, tune this */ - if (aged_reused[level] < (usable++ ? - inodes_stat.aged_count[level] : - 2)) - break; - aged_reused[level] = 0; - } - if (inode) { - if (!(inode->i_status & ST_AGED)) - printk("VFS: inode aging inconsistency\n"); - if (atomic_read(&inode->i_count)) - printk("VFS: i_count of aged inode is not zero\n"); - if (inode->i_dirt) - printk("VFS: Hey, somebody made my aged inode dirty\n"); - _clear_inode(inode, 0, 0); - goto retry; - } + tmp = head; + for (;;) { + tmp = tmp->next; + inode = NULL; + if (tmp == head) + break; + inode = list_entry(tmp, struct inode, i_hash); + if (inode->i_sb != sb) + continue; + if (inode->i_ino != ino) + continue; + atomic_inc(&inode->i_count); + break; } - if (!inode) { - vfs_unlock(); - schedule(); - if (retry > 10) - panic("VFS: cannot repair inode shortage"); - if (retry > 2) - printk("VFS: no free inodes\n"); - retry++; - vfs_lock(); - goto retry; - } -xcheck("get_empty_inode",inode); - memset(inode, 0, sizeof(struct inode)); - atomic_set(&inode->i_count, 1); + return inode; +} + +/* + * This just initializes the inode fields + * to known values before returning the inode.. + * + * i_sb, i_ino, i_count, i_state and the lists have + * been initialized elsewhere.. + */ +void clean_inode(struct inode *inode) +{ + memset(&inode->u, 0, sizeof(inode->u)); + inode->i_pipe = 0; + inode->i_sock = 0; + inode->i_op = NULL; inode->i_nlink = 1; + memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); sema_init(&inode->i_sem, 1); - inode->i_ino = ++last_inode; - inode->i_version = ++event; - insert_all(&all_i, inode); - return inode; } -static inline blocking struct inode * _get_empty_inode_hashed(dev_t i_dev, - unsigned long i_ino) +/* + * This gets called with I_LOCK held: it needs + * to read the inode and then unlock it + */ +static inline void read_inode(struct inode *inode, struct super_block *sb) { - struct inode ** base = &hashtable[hash(i_dev, i_ino)]; - struct inode * inode = *base; - if (inode) do { - if (inode->i_ino == i_ino && inode->i_dev == i_dev) { - atomic_inc(&inode->i_count); - printk("VFS: inode %lx is already in use\n", i_ino); - return inode; - } - inode = inode->i_hash_next; - } while(inode != *base); - inode = _get_empty_inode(); - inode->i_dev = i_dev; - inode->i_ino = i_ino; - insert_hash(base, inode); - inode->i_status |= ST_HASHED; - return inode; + sb->s_op->read_inode(inode); + unlock_inode(inode); } -blocking struct inode * get_empty_inode_hashed(dev_t i_dev, unsigned long i_ino) +struct inode * get_empty_inode(void) { + static unsigned long last_ino = 0; struct inode * inode; + struct list_head * tmp = inode_unused.next; + + if (tmp != &inode_unused) { + list_del(tmp); + inode = list_entry(tmp, struct inode, i_list); +add_new_inode: + INIT_LIST_HEAD(&inode->i_list); + INIT_LIST_HEAD(&inode->i_hash); + inode->i_sb = NULL; + inode->i_ino = ++last_ino; + atomic_set(&inode->i_count, 1); + inode->i_state = 0; + clean_inode(inode); + return inode; + } - vfs_lock(); - inode = _get_empty_inode_hashed(i_dev, i_ino); - vfs_unlock(); + /* + * Warning: if this succeeded, we will now + * return with the inode lock, and we need to + * unlock it. + */ + inode = grow_inodes(); + if (inode) { + spin_unlock(&inode_lock); + goto add_new_inode; + } return inode; } -void _get_inode(struct inode * inode) +struct inode * get_pipe_inode(void) { - if (inode->i_status & ST_IBASKET) { - inode->i_status &= ~ST_IBASKET; - remove_ibasket(&inode->i_sb->s_ibasket, inode); - inode->i_sb->s_ibasket_count--; - } - if (inode->i_status & ST_AGED) { - inode->i_status &= ~ST_AGED; - remove_lru(&aged_i[inode->i_level], inode); - inodes_stat.aged_count[inode->i_level]--; - aged_reused[inode->i_level]++; - if (S_ISDIR(inode->i_mode)) - /* make dirs less thrashable */ - inode->i_level = NR_LEVELS-1; - else if(inode->i_nlink > 1) - /* keep hardlinks totally separate */ - inode->i_level = NR_LEVELS; - else if(++inode->i_reuse_count >= age_table[inode->i_level] - && inode->i_level < NR_LEVELS-1) - inode->i_level++; - if (atomic_read(&inode->i_count) != 1) - printk("VFS: inode count was not zero (%d after ++)\n", atomic_read(&inode->i_count)); - } else if(inode->i_status & ST_EMPTY) - printk("VFS: invalid reuse of empty inode\n"); -} + extern struct inode_operations pipe_inode_operations; + struct inode *inode = get_empty_inode(); -blocking struct inode * iget(struct super_block * sb, unsigned long i_ino) -{ - struct inode ** base; - struct inode * inode; - dev_t i_dev; - - if (!sb) - panic("VFS: iget with sb == NULL"); - i_dev = sb->s_dev; - if (!i_dev) - panic("VFS: sb->s_dev is NULL\n"); - base = &hashtable[hash(i_dev, i_ino)]; - vfs_lock(); - inode = *base; - if (inode) do { - if (inode->i_ino == i_ino && inode->i_dev == i_dev) { - atomic_inc(&inode->i_count); - _get_inode(inode); - - /* Allow concurrent writes/puts. This is in particular - * useful e.g. when syncing large chunks. - * I hope the i_dirty flag is everywhere set as soon - * as _any_ modifcation is made and _before_ - * giving up control, so no harm should occur if data - * is modified during writes, because it will be - * rewritten again (does a short inconsistency on the - * disk harm?) - */ - wait_io(inode, ST_TO_READ); - vfs_unlock(); - goto done; + if (inode) { + unsigned long page = __get_free_page(GFP_USER); + + if (!page) { + iput(inode); + inode = NULL; + } else { + PIPE_BASE(*inode) = (char *) page; + inode->i_op = &pipe_inode_operations; + atomic_set(&inode->i_count, 1); + PIPE_WAIT(*inode) = NULL; + PIPE_START(*inode) = PIPE_LEN(*inode) = 0; + PIPE_RD_OPENERS(*inode) = PIPE_WR_OPENERS(*inode) = 0; + PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; + PIPE_LOCK(*inode) = 0; + inode->i_pipe = 1; + inode->i_mode |= S_IFIFO | S_IRUSR | S_IWUSR; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_blksize = PAGE_SIZE; } - inode = inode->i_hash_next; - } while(inode != *base); - inode = _get_empty_inode_hashed(i_dev, i_ino); - inode->i_sb = sb; - inode->i_flags = sb->s_flags; - if (sb->s_op && sb->s_op->read_inode) { - set_io(inode, 0, ST_TO_READ); /* do not wait at all */ - sb->s_op->read_inode(inode); - if (release_io(inode, ST_TO_READ)) - goto done; } - vfs_unlock(); -done: return inode; } -blocking void __iput(struct inode * inode) +/* + * This is called with the inode lock held.. Be careful. + */ +static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head) { - struct super_block * sb; -xcheck("_iput",inode); - - if (atomic_read(&inode->i_count) < 0) - printk("VFS: i_count is negative\n"); - - if (atomic_read(&inode->i_count) || (inode->i_status & ST_FREEING)) - return; - - inode->i_status |= ST_FREEING; - if (inode->i_pipe) { - free_page((unsigned long)PIPE_BASE(*inode)); - PIPE_BASE(*inode)= NULL; - } - if ((sb = inode->i_sb)) { - if (sb->s_op) { - if (inode->i_nlink <= 0 && - !(inode->i_status & (ST_EMPTY|ST_IBASKET))) { - _clear_inode(inode, 0, 1); - goto done; - } - if (inode->i_dirt) { - inode->i_dirt = 0; - _io(sb->s_op->write_inode, inode, - ST_TO_PUT|ST_TO_WRITE, ST_TO_WRITE); - if (atomic_read(&inode->i_count)) - goto done; - } - } - if (IS_WRITABLE(inode) && sb->dq_op) { - /* can operate in parallel to other ops ? */ - _io(sb->dq_op->drop, inode, 0, ST_TO_DROP); - if (atomic_read(&inode->i_count)) - goto done; - } - } - if (inode->i_mmap) - printk("VFS: inode has mappings\n"); - if (inode->i_status & ST_AGED) { - printk("VFS: reaging inode\n"); -#if defined(DEBUG) -printpath(inode->i_dentry); -printk("\n"); -#endif - goto done; - } - if (!(inode->i_status & (ST_HASHED|ST_EMPTY))) { - _clear_inode(inode, 0, 1); - goto done; + struct inode * inode; + struct list_head * tmp = inode_unused.next; + + if (tmp != &inode_unused) { + list_del(tmp); + inode = list_entry(tmp, struct inode, i_list); +add_new_inode: + list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_hash, head); + inode->i_sb = sb; + inode->i_dev = sb->s_dev; + inode->i_ino = ino; + inode->i_flags = sb->s_flags; + atomic_set(&inode->i_count, 1); + inode->i_state = 1 << I_LOCK; + spin_unlock(&inode_lock); + clean_inode(inode); + read_inode(inode, sb); + return inode; } - if (inode->i_status & ST_EMPTY) { - printk("VFS: aging an empty inode\n"); - goto done; + + /* + * Uhhuh.. We need to expand. Unlock for the allocation, + * but note that "grow_inodes()" will return with the + * lock held again if the allocation succeeded. + */ + spin_unlock(&inode_lock); + inode = grow_inodes(); + if (inode) { + /* We released the lock, so.. */ + struct inode * old = find_inode(sb, ino, head); + if (!old) + goto add_new_inode; + list_add(&inode->i_list, &inode_unused); + spin_unlock(&inode_lock); + wait_on_inode(old); + return old; } - insert_lru(&aged_i[inode->i_level], inode); - inodes_stat.aged_count[inode->i_level]++; - inode->i_status |= ST_AGED; -done: - inode->i_status &= ~ST_FREEING; + return inode; } -blocking void _iput(struct inode * inode) +static inline unsigned long hash(struct super_block *sb, unsigned long i_ino) { - vfs_lock(); - __iput(inode); - vfs_unlock(); + unsigned long tmp = i_ino | (unsigned long) sb; + tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2); + return tmp & HASH_MASK; } -blocking void sync_inodes(kdev_t dev) +struct inode *iget(struct super_block *sb, unsigned long ino) { + struct list_head * head = inode_hashtable + hash(sb,ino); struct inode * inode; - vfs_lock(); - inode = all_i; - if (inode) do { -xcheck("sync_inodes",inode); - if (inode->i_dirt && (inode->i_dev == dev || !dev)) { - if (inode->i_sb && inode->i_sb->s_op && - !(inode->i_status & ST_FREEING)) { - inode->i_dirt = 0; - _io(inode->i_sb->s_op->write_inode, inode, - ST_IO, ST_TO_WRITE); - } - } - inode = inode->i_next; - } while(inode != all_i); - vfs_unlock(); + + spin_lock(&inode_lock); + inode = find_inode(sb, ino, head); + if (!inode) { + try_to_free_inodes(); + return get_new_inode(sb, ino, head); + } + spin_unlock(&inode_lock); + wait_on_inode(inode); + return inode; } -blocking int _check_inodes(kdev_t dev, int complain) +void insert_inode_hash(struct inode *inode) { - struct inode * inode; - int bad = 0; - - vfs_lock(); -startover: - inode = all_i; - if (inode) do { - struct inode * next; -xcheck("_check_inodes",inode); - next = inode->i_next; - if (inode->i_dev == dev) { - if (inode->i_dirt || atomic_read(&inode->i_count)) { - bad++; - } else { - _clear_inode(inode, 0, 0); - - /* _clear_inode() may recursively clear other - * inodes, probably also the next one. - */ - if (next->i_status & ST_EMPTY) - goto startover; - } - } - inode = next; - } while(inode != all_i); - vfs_unlock(); - if (complain && bad) - printk("VFS: %d inode(s) busy on removed device `%s'\n", - bad, kdevname(dev)); - return (bad == 0); + struct list_head *head = inode_hashtable + hash(inode->i_sb, inode->i_ino); + list_add(&inode->i_hash, head); } -/*inline*/ void invalidate_inodes(kdev_t dev) +void iput(struct inode *inode) { - /* Requires two passes, because of the new dcache holding - * directories with i_count > 1. - */ - (void)_check_inodes(dev, 0); - (void)_check_inodes(dev, 1); + if (inode) { + if (inode->i_pipe) + wake_up_interruptible(&PIPE_WAIT(*inode)); + + /* + * Last user dropping the inode? + */ + if (atomic_read(&inode->i_count) == 1) { + void (*put)(struct inode *); + if (inode->i_sb && inode->i_sb->s_op) { + put = inode->i_sb->s_op->put_inode; + if (put) + put(inode); + } + } + atomic_dec(&inode->i_count); + } } -/*inline*/ int fs_may_mount(kdev_t dev) +int bmap(struct inode * inode, int block) { - return _check_inodes(dev, 0); + if (inode->i_op && inode->i_op->bmap) + return inode->i_op->bmap(inode, block); + return 0; } -int fs_may_remount_ro(kdev_t dev) +/* + * Initialize the hash tables + */ +void inode_init(void) { - (void)dev; - return 1; /* not checked any more */ + int i; + struct list_head *head = inode_hashtable; + + i = HASH_SIZE; + do { + INIT_LIST_HEAD(head); + head++; + i--; + } while (i); } -int fs_may_umount(kdev_t dev, struct dentry * root) +/* + * FIXME! These need to go through the in-use inodes to + * check whether we can mount/umount/remount. + */ +int fs_may_mount(kdev_t dev) { - struct inode * inode; - vfs_lock(); - inode = all_i; - if (inode) do { - if (inode->i_dev == dev && atomic_read(&inode->i_count)) - if (inode != root->d_inode) { - vfs_unlock(); - return 0; - } - inode = inode->i_next; - } while(inode != all_i); - vfs_unlock(); return 1; } -extern struct inode_operations pipe_inode_operations; - -blocking struct inode * get_pipe_inode(void) +int fs_may_umount(kdev_t dev, struct dentry * root) { - struct inode * inode = get_empty_inode(); - - PIPE_BASE(*inode) = (char*)__get_free_page(GFP_USER); - if (!(PIPE_BASE(*inode))) { - iput(inode); - return NULL; - } - inode->i_blksize = PAGE_SIZE; - inode->i_pipe = 1; - inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; - atomic_inc(&inode->i_count); - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inode->i_op = &pipe_inode_operations; - PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; - - return inode; + return 0; } -int bmap(struct inode * inode, int block) +int fs_may_remount_ro(kdev_t dev) { - if (inode->i_op && inode->i_op->bmap) - return inode->i_op->bmap(inode, block); return 0; } diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index e22c3ca3b590..4d3b7dc1508f 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -54,6 +54,7 @@ struct inode_operations isofs_dir_inode_operations = NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ + NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ isofs_bmap, /* bmap */ diff --git a/fs/isofs/file.c b/fs/isofs/file.c index 2742283f7e59..d14a558a0475 100644 --- a/fs/isofs/file.c +++ b/fs/isofs/file.c @@ -47,6 +47,7 @@ struct inode_operations isofs_file_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ + NULL, /* follow_link */ generic_readpage, /* readpage */ NULL, /* writepage */ isofs_bmap, /* bmap */ diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index d081a4cdd822..4a63d4eb01dd 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -481,12 +481,12 @@ struct super_block *isofs_read_super(struct super_block *s,void *data, s->u.isofs_sb.s_mode = opt.mode & 0777; s->s_blocksize = opt.blocksize; s->s_blocksize_bits = blocksize_bits; - s->s_mounted = iget(s, (isonum_733(rootp->extent) + + s->s_root = d_alloc_root(iget(s, (isonum_733(rootp->extent) + isonum_711(rootp->ext_attr_length)) - << s -> u.isofs_sb.s_log_zone_size); + << s -> u.isofs_sb.s_log_zone_size), NULL); unlock_super(s); - if (!(s->s_mounted)) { + if (!(s->s_root)) { s->s_dev = 0; printk("get root inode failed\n"); MOD_DEC_USE_COUNT; diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 155f4ae436da..909e8b25fd5b 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -201,62 +201,60 @@ static struct buffer_head * isofs_find_entry(struct inode * dir, return NULL; } -int isofs_lookup(struct inode * dir,const char * name, int len, - struct inode ** result) +int isofs_lookup(struct inode * dir, struct qstr *name, + struct inode ** result) { unsigned long ino, ino_back; struct buffer_head * bh; char *lcname; + struct inode *inode; #ifdef DEBUG - printk("lookup: %x %d\n",dir->i_ino, len); + printk("lookup: %x %d\n",dir->i_ino, name->len); #endif - *result = NULL; if (!dir) return -ENOENT; - if (!S_ISDIR(dir->i_mode)) { - iput(dir); + if (!S_ISDIR(dir->i_mode)) return -ENOENT; - } /* If mounted with check=relaxed (and most likely norock), * then first convert this name to lower case. */ if (dir->i_sb->u.isofs_sb.s_name_check == 'r' && - (lcname = kmalloc(len, GFP_KERNEL)) != NULL) { + (lcname = kmalloc(name->len, GFP_KERNEL)) != NULL) { int i; char c; - for (i=0; ilen; i++) { + c = name->name[i]; if (c >= 'A' && c <= 'Z') c |= 0x20; lcname[i] = c; } - bh = isofs_find_entry(dir,lcname,len, &ino, &ino_back); + bh = isofs_find_entry(dir, lcname, name->len, + &ino, &ino_back); kfree(lcname); } else - bh = isofs_find_entry(dir,name,len, &ino, &ino_back); + bh = isofs_find_entry(dir, name->name, + name->len, &ino, &ino_back); - if (!bh) { - iput(dir); + if (!bh) return -ENOENT; - } brelse(bh); - if (!(*result = iget(dir->i_sb,ino))) { - iput(dir); + inode = iget(dir->i_sb,ino); + if (!inode) return -EACCES; - } /* We need this backlink for the ".." entry unless the name that we * are looking up traversed a mount point (in which case the inode * may not even be on an iso9660 filesystem, and writing to * u.isofs_i would only cause memory corruption). */ - if (ino_back && !(*result)->i_pipe && (*result)->i_sb == dir->i_sb) - (*result)->u.isofs_i.i_backlink = ino_back; + if (ino_back && !inode->i_pipe && inode->i_sb == dir->i_sb) + inode->u.isofs_i.i_backlink = ino_back; + + *result = inode; - iput(dir); return 0; } diff --git a/fs/minix/dir.c b/fs/minix/dir.c index 439005f4e121..ec5113c4a67f 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -50,6 +50,7 @@ struct inode_operations minix_dir_inode_operations = { minix_mknod, /* mknod */ minix_rename, /* rename */ NULL, /* readlink */ + NULL, /* follow_link */ NULL, /* readpage */ NULL, /* writepage */ NULL, /* bmap */ diff --git a/fs/minix/file.c b/fs/minix/file.c index 86cbca2b22eb..23aa70268a6a 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -58,6 +58,7 @@ struct inode_operations minix_file_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ + NULL, /* follow_link */ generic_readpage, /* readpage */ NULL, /* writepage */ minix_bmap, /* bmap */ diff --git a/fs/minix/inode.c b/fs/minix/inode.c index cbd735ef1de2..fd0b433a8642 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -125,15 +126,13 @@ int minix_remount (struct super_block * sb, int * flags, char * data) * it really _is_ a minix filesystem, and to check the size * of the directory entry. */ -static const char * minix_checkroot(struct super_block *s) +static const char * minix_checkroot(struct super_block *s, struct inode *dir) { - struct inode * dir; struct buffer_head *bh; struct minix_dir_entry *de; const char * errmsg; int dirsize; - dir = s->s_mounted; if (!S_ISDIR(dir->i_mode)) return "root directory is not a directory"; @@ -172,7 +171,8 @@ struct super_block *minix_read_super(struct super_block *s,void *data, int i, block; kdev_t dev = s->s_dev; const char * errmsg; - + struct inode *root_inode; + if (32 != sizeof (struct minix_inode)) panic("bad V1 i-node size"); if (64 != sizeof(struct minix2_inode)) @@ -272,8 +272,9 @@ struct super_block *minix_read_super(struct super_block *s,void *data, /* set up enough so that it can read an inode */ s->s_dev = dev; s->s_op = &minix_sops; - s->s_mounted = iget(s,MINIX_ROOT_INO); - if (!s->s_mounted) { + root_inode = iget(s,MINIX_ROOT_INO); + s->s_root = d_alloc_root(root_inode, NULL); + if (!s->s_root) { s->s_dev = 0; brelse(bh); if (!silent) @@ -282,11 +283,11 @@ struct super_block *minix_read_super(struct super_block *s,void *data, return NULL; } - errmsg = minix_checkroot(s); + errmsg = minix_checkroot(s, root_inode); if (errmsg) { if (!silent) printk("MINIX-fs: %s\n", errmsg); - iput (s->s_mounted); + d_delete(s->s_root); /* XXX Is this enough? */ s->s_dev = 0; brelse (bh); MOD_DEC_USE_COUNT; diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 30d52fbe4006..6829ab334fa9 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -104,7 +104,7 @@ static struct buffer_head * minix_find_entry(struct inode * dir, return NULL; } -int minix_lookup(struct inode * dir,const char * name, int len, +int minix_lookup(struct inode * dir, struct qstr *name, struct inode ** result) { int ino; @@ -114,21 +114,14 @@ int minix_lookup(struct inode * dir,const char * name, int len, *result = NULL; if (!dir) return -ENOENT; - if (!S_ISDIR(dir->i_mode)) { - iput(dir); + if (!S_ISDIR(dir->i_mode)) return -ENOENT; - } - if (!(bh = minix_find_entry(dir,name,len,&de))) { - iput(dir); + if (!(bh = minix_find_entry(dir, name->name, name->len, &de))) return -ENOENT; - } ino = de->inode; brelse(bh); - if (!(*result = iget(dir->i_sb,ino))) { - iput(dir); + if (!(*result = iget(dir->i_sb,ino))) return -EACCES; - } - iput(dir); return 0; } @@ -208,42 +201,37 @@ static int minix_add_entry(struct inode * dir, return 0; } -int minix_create(struct inode * dir,const char * name, int len, int mode, - struct inode ** result) +int minix_create(struct inode * dir, struct dentry *dentry, int mode) { int error; struct inode * inode; struct buffer_head * bh; struct minix_dir_entry * de; - *result = NULL; if (!dir) return -ENOENT; inode = minix_new_inode(dir); - if (!inode) { - iput(dir); + if (!inode) return -ENOSPC; - } inode->i_op = &minix_file_inode_operations; inode->i_mode = mode; inode->i_dirt = 1; - error = minix_add_entry(dir,name,len, &bh ,&de); + error = minix_add_entry(dir, dentry->d_name.name, + dentry->d_name.len, &bh ,&de); if (error) { inode->i_nlink--; inode->i_dirt = 1; iput(inode); - iput(dir); return error; } de->inode = inode->i_ino; mark_buffer_dirty(bh, 1); brelse(bh); - iput(dir); - *result = inode; + d_instantiate(dentry, inode, 0); return 0; } -int minix_mknod(struct inode * dir, const char * name, int len, int mode, int rdev) +int minix_mknod(struct inode * dir, struct dentry *dentry, int mode, int rdev) { int error; struct inode * inode; @@ -252,17 +240,15 @@ int minix_mknod(struct inode * dir, const char * name, int len, int mode, int rd if (!dir) return -ENOENT; - bh = minix_find_entry(dir,name,len,&de); + bh = minix_find_entry(dir, dentry->d_name.name, + dentry->d_name.len, &de); if (bh) { brelse(bh); - iput(dir); return -EEXIST; } inode = minix_new_inode(dir); - if (!inode) { - iput(dir); + if (!inode) return -ENOSPC; - } inode->i_uid = current->fsuid; inode->i_mode = mode; inode->i_op = NULL; @@ -284,23 +270,21 @@ int minix_mknod(struct inode * dir, const char * name, int len, int mode, int rd if (S_ISBLK(mode) || S_ISCHR(mode)) inode->i_rdev = to_kdev_t(rdev); inode->i_dirt = 1; - error = minix_add_entry(dir, name, len, &bh, &de); + error = minix_add_entry(dir, dentry->d_name.name, dentry->d_name.len, &bh, &de); if (error) { inode->i_nlink--; inode->i_dirt = 1; iput(inode); - iput(dir); return error; } de->inode = inode->i_ino; mark_buffer_dirty(bh, 1); brelse(bh); - iput(dir); - iput(inode); + d_instantiate(dentry, inode, 0); return 0; } -int minix_mkdir(struct inode * dir, const char * name, int len, int mode) +int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode) { int error; struct inode * inode; @@ -308,31 +292,24 @@ int minix_mkdir(struct inode * dir, const char * name, int len, int mode) struct minix_dir_entry * de; struct minix_sb_info * info; - if (!dir || !dir->i_sb) { - iput(dir); + if (!dir || !dir->i_sb) return -EINVAL; - } info = &dir->i_sb->u.minix_sb; - bh = minix_find_entry(dir,name,len,&de); + bh = minix_find_entry(dir, dentry->d_name.name, + dentry->d_name.len, &de); if (bh) { brelse(bh); - iput(dir); return -EEXIST; } - if (dir->i_nlink >= MINIX_LINK_MAX) { - iput(dir); + if (dir->i_nlink >= MINIX_LINK_MAX) return -EMLINK; - } inode = minix_new_inode(dir); - if (!inode) { - iput(dir); + if (!inode) return -ENOSPC; - } inode->i_op = &minix_dir_inode_operations; inode->i_size = 2 * info->s_dirsize; dir_block = minix_bread(inode,0,1); if (!dir_block) { - iput(dir); inode->i_nlink--; inode->i_dirt = 1; iput(inode); @@ -351,9 +328,9 @@ int minix_mkdir(struct inode * dir, const char * name, int len, int mode) if (dir->i_mode & S_ISGID) inode->i_mode |= S_ISGID; inode->i_dirt = 1; - error = minix_add_entry(dir, name, len, &bh, &de); + error = minix_add_entry(dir, dentry->d_name.name, + dentry->d_name.len, &bh, &de); if (error) { - iput(dir); inode->i_nlink=0; iput(inode); return error; @@ -362,9 +339,8 @@ int minix_mkdir(struct inode * dir, const char * name, int len, int mode) mark_buffer_dirty(bh, 1); dir->i_nlink++; dir->i_dirt = 1; - iput(dir); - iput(inode); brelse(bh); + d_instantiate(dentry, inode, D_DIR); return 0; } @@ -427,7 +403,7 @@ bad_dir: return 1; } -int minix_rmdir(struct inode * dir, const char * name, int len) +int minix_rmdir(struct inode * dir, struct dentry *dentry) { int retval; struct inode * inode; @@ -435,13 +411,14 @@ int minix_rmdir(struct inode * dir, const char * name, int len) struct minix_dir_entry * de; inode = NULL; - bh = minix_find_entry(dir,name,len,&de); + bh = minix_find_entry(dir, dentry->d_name.name, + dentry->d_name.len, &de); retval = -ENOENT; if (!bh) goto end_rmdir; retval = -EPERM; - if (!(inode = iget(dir->i_sb, de->inode))) - goto end_rmdir; + inode = dentry->d_inode; + if ((dir->i_mode & S_ISVTX) && !fsuser() && current->fsuid != inode->i_uid && current->fsuid != dir->i_uid) @@ -476,15 +453,14 @@ int minix_rmdir(struct inode * dir, const char * name, int len) inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; dir->i_nlink--; dir->i_dirt=1; + d_delete(dentry); retval = 0; end_rmdir: - iput(dir); - iput(inode); brelse(bh); return retval; } -int minix_unlink(struct inode * dir, const char * name, int len) +int minix_unlink(struct inode * dir, struct dentry *dentry) { int retval; struct inode * inode; @@ -494,16 +470,16 @@ int minix_unlink(struct inode * dir, const char * name, int len) repeat: retval = -ENOENT; inode = NULL; - bh = minix_find_entry(dir,name,len,&de); + bh = minix_find_entry(dir, dentry->d_name.name, + dentry->d_name.len, &de); if (!bh) goto end_unlink; - if (!(inode = iget(dir->i_sb, de->inode))) - goto end_unlink; + inode = dentry->d_inode; + retval = -EPERM; if (S_ISDIR(inode->i_mode)) goto end_unlink; if (de->inode != inode->i_ino) { - iput(inode); brelse(bh); current->counter = 0; schedule(); @@ -531,15 +507,15 @@ repeat: inode->i_nlink--; inode->i_ctime = dir->i_ctime; inode->i_dirt = 1; + d_delete(dentry); /* This also frees the inode */ retval = 0; end_unlink: brelse(bh); - iput(inode); - iput(dir); return retval; } -int minix_symlink(struct inode * dir, const char * name, int len, const char * symname) +int minix_symlink(struct inode * dir, struct dentry *dentry, + const char * symname) { struct minix_dir_entry * de; struct inode * inode = NULL; @@ -547,15 +523,13 @@ int minix_symlink(struct inode * dir, const char * name, int len, const char * s int i; char c; - if (!(inode = minix_new_inode(dir))) { - iput(dir); + if (!(inode = minix_new_inode(dir))) return -ENOSPC; - } + inode->i_mode = S_IFLNK | 0777; inode->i_op = &minix_symlink_inode_operations; name_block = minix_bread(inode,0,1); if (!name_block) { - iput(dir); inode->i_nlink--; inode->i_dirt = 1; iput(inode); @@ -569,68 +543,62 @@ int minix_symlink(struct inode * dir, const char * name, int len, const char * s brelse(name_block); inode->i_size = i; inode->i_dirt = 1; - bh = minix_find_entry(dir,name,len,&de); + bh = minix_find_entry(dir, dentry->d_name.name, + dentry->d_name.len, &de); if (bh) { inode->i_nlink--; inode->i_dirt = 1; iput(inode); brelse(bh); - iput(dir); return -EEXIST; } - i = minix_add_entry(dir, name, len, &bh, &de); + i = minix_add_entry(dir, dentry->d_name.name, + dentry->d_name.len, &bh, &de); if (i) { inode->i_nlink--; inode->i_dirt = 1; iput(inode); - iput(dir); return i; } de->inode = inode->i_ino; mark_buffer_dirty(bh, 1); brelse(bh); - iput(dir); - iput(inode); + d_instantiate(dentry, inode, 0); return 0; } -int minix_link(struct inode * oldinode, struct inode * dir, const char * name, int len) +int minix_link(struct inode * inode, struct inode * dir, + struct dentry *dentry) { int error; struct minix_dir_entry * de; struct buffer_head * bh; - if (S_ISDIR(oldinode->i_mode)) { - iput(oldinode); - iput(dir); + if (S_ISDIR(inode->i_mode)) return -EPERM; - } - if (oldinode->i_nlink >= MINIX_LINK_MAX) { - iput(oldinode); - iput(dir); + + if (inode->i_nlink >= MINIX_LINK_MAX) return -EMLINK; - } - bh = minix_find_entry(dir,name,len,&de); + + bh = minix_find_entry(dir, dentry->d_name.name, + dentry->d_name.len, &de); if (bh) { brelse(bh); - iput(dir); - iput(oldinode); return -EEXIST; } - error = minix_add_entry(dir, name, len, &bh, &de); + error = minix_add_entry(dir, dentry->d_name.name, + dentry->d_name.len, &bh, &de); if (error) { - iput(dir); - iput(oldinode); + brelse(bh); return error; } - de->inode = oldinode->i_ino; + de->inode = inode->i_ino; mark_buffer_dirty(bh, 1); brelse(bh); - iput(dir); - oldinode->i_nlink++; - oldinode->i_ctime = CURRENT_TIME; - oldinode->i_dirt = 1; - iput(oldinode); + inode->i_nlink++; + inode->i_ctime = CURRENT_TIME; + inode->i_dirt = 1; + d_instantiate(dentry, inode, 0); return 0; } @@ -649,7 +617,9 @@ static int subdir(struct inode * new_inode, struct inode * old_inode) if (new_inode->i_dev != old_inode->i_dev) break; ino = new_inode->i_ino; - if (minix_lookup(new_inode,"..",2,&new_inode)) + if (minix_lookup(new_inode, + &(struct qstr) { "..", 2, 0 }, + &new_inode)) break; if (new_inode->i_ino == ino) break; @@ -671,8 +641,8 @@ static int subdir(struct inode * new_inode, struct inode * old_inode) * Anybody can rename anything with this: the permission checks are left to the * higher-level routines. */ -static int do_minix_rename(struct inode * old_dir, const char * old_name, int old_len, - struct inode * new_dir, const char * new_name, int new_len) +static int do_minix_rename(struct inode * old_dir, struct dentry *old_dentry, + struct inode * new_dir, struct dentry *new_dentry) { struct inode * old_inode, * new_inode; struct buffer_head * old_bh, * new_bh, * dir_bh; @@ -686,28 +656,26 @@ try_again: brelse(old_bh); brelse(new_bh); brelse(dir_bh); - iput(old_inode); - iput(new_inode); current->counter = 0; schedule(); start_up: old_inode = new_inode = NULL; old_bh = new_bh = dir_bh = NULL; - old_bh = minix_find_entry(old_dir,old_name,old_len,&old_de); + old_bh = minix_find_entry(old_dir, old_dentry->d_name.name, + old_dentry->d_name.len, &old_de); retval = -ENOENT; if (!old_bh) goto end_rename; - old_inode = __iget(old_dir->i_sb, old_de->inode); - if (!old_inode) - goto end_rename; + old_inode = old_dentry->d_inode; retval = -EPERM; if ((old_dir->i_mode & S_ISVTX) && current->fsuid != old_inode->i_uid && current->fsuid != old_dir->i_uid && !fsuser()) goto end_rename; - new_bh = minix_find_entry(new_dir,new_name,new_len,&new_de); + new_inode = new_dentry->d_inode; + new_bh = minix_find_entry(new_dir, new_dentry->d_name.name, + new_dentry->d_name.len, &new_de); if (new_bh) { - new_inode = __iget(new_dir->i_sb, new_de->inode); if (!new_inode) { brelse(new_bh); new_bh = NULL; @@ -754,7 +722,10 @@ start_up: goto end_rename; } if (!new_bh) { - retval = minix_add_entry(new_dir,new_name,new_len,&new_bh,&new_de); + retval = minix_add_entry(new_dir, + new_dentry->d_name.name, + new_dentry->d_name.len, + &new_bh, &new_de); if (retval) goto end_rename; } @@ -794,15 +765,14 @@ start_up: new_dir->i_dirt = 1; } } + /* Update the dcache */ + d_move(old_dentry, new_dentry->d_parent, &new_dentry->d_name); + d_delete(new_dentry); retval = 0; end_rename: brelse(dir_bh); brelse(old_bh); brelse(new_bh); - iput(old_inode); - iput(new_inode); - iput(old_dir); - iput(new_dir); return retval; } @@ -815,8 +785,8 @@ end_rename: * the same device that races occur: many renames can happen at once, as long * as they are on different partitions. */ -int minix_rename(struct inode * old_dir, const char * old_name, int old_len, - struct inode * new_dir, const char * new_name, int new_len) +int minix_rename(struct inode * old_dir, struct dentry *old_dentry, + struct inode * new_dir, struct dentry *new_dentry) { static struct wait_queue * wait = NULL; static int lock = 0; @@ -825,8 +795,8 @@ int minix_rename(struct inode * old_dir, const char * old_name, int old_len, while (lock) sleep_on(&wait); lock = 1; - result = do_minix_rename(old_dir, old_name, old_len, - new_dir, new_name, new_len); + result = do_minix_rename(old_dir, old_dentry, + new_dir, new_dentry); lock = 0; wake_up(&wait); return result; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 871f5fd4d7e1..c6162be453d2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -363,7 +363,6 @@ nfs_notify_change(struct inode *inode, struct iattr *attr) nfs_truncate_dirty_pages(inode, sattr.size); nfs_refresh_inode(inode, &fattr); } - inode->i_dirt = 0; return error; } diff --git a/fs/open.c b/fs/open.c index 4444653d5664..436766482014 100644 --- a/fs/open.c +++ b/fs/open.c @@ -90,7 +90,6 @@ int do_truncate(struct inode *inode, unsigned long length) vmtruncate(inode, length); if (inode->i_op && inode->i_op->truncate) inode->i_op->truncate(inode); - inode->i_status |= ST_MODIFIED; } up(&inode->i_sem); return error; @@ -436,12 +435,7 @@ asmlinkage int sys_fchmod(unsigned int fd, mode_t mode) mode = inode->i_mode; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - inode->i_dirt = 1; err = notify_change(inode, &newattrs); -#ifdef CONFIG_OMIRR - if(!err) - omirr_printall(inode, " M %ld %ld ", CURRENT_TIME, newattrs.ia_mode); -#endif out: unlock_kernel(); return err; @@ -471,12 +465,7 @@ asmlinkage int sys_chmod(const char * filename, mode_t mode) mode = inode->i_mode; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - inode->i_dirt = 1; error = notify_change(inode, &newattrs); -#ifdef CONFIG_OMIRR - if(!error) - omirr_printall(inode, " M %ld %ld ", CURRENT_TIME, newattrs.ia_mode); -#endif iput_and_out: iput(inode); out: @@ -528,7 +517,6 @@ asmlinkage int sys_fchown(unsigned int fd, uid_t user, gid_t group) newattrs.ia_mode &= ~S_ISGID; newattrs.ia_valid |= ATTR_MODE; } - inode->i_dirt = 1; if (inode->i_sb && inode->i_sb->dq_op) { inode->i_sb->dq_op->initialize(inode, -1); error = -EDQUOT; @@ -539,11 +527,6 @@ asmlinkage int sys_fchown(unsigned int fd, uid_t user, gid_t group) inode->i_sb->dq_op->transfer(inode, &newattrs, 1); } else error = notify_change(inode, &newattrs); -#ifdef CONFIG_OMIRR - if(!error) - omirr_printall(inode, " O %d %d ", CURRENT_TIME, - newattrs.ia_uid, newattrs.ia_gid); -#endif out: unlock_kernel(); return error; @@ -590,7 +573,6 @@ asmlinkage int sys_chown(const char * filename, uid_t user, gid_t group) newattrs.ia_mode &= ~S_ISGID; newattrs.ia_valid |= ATTR_MODE; } - inode->i_dirt = 1; if (inode->i_sb->dq_op) { inode->i_sb->dq_op->initialize(inode, -1); error = -EDQUOT; @@ -601,11 +583,6 @@ asmlinkage int sys_chown(const char * filename, uid_t user, gid_t group) inode->i_sb->dq_op->transfer(inode, &newattrs, 1); } else error = notify_change(inode, &newattrs); -#ifdef CONFIG_OMIRR - if(!error) - omirr_printall(inode, " O %d %d ", CURRENT_TIME, - newattrs.ia_uid, newattrs.ia_gid); -#endif iput_and_out: iput(inode); out: diff --git a/fs/pipe.c b/fs/pipe.c index 732d37af50e9..260237cb43ae 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -75,10 +75,7 @@ static long pipe_read(struct inode * inode, struct file * filp, PIPE_LOCK(*inode)--; wake_up_interruptible(&PIPE_WAIT(*inode)); if (read) { - if (DO_UPDATE_ATIME(inode)) { - inode->i_atime = CURRENT_TIME; - inode->i_dirt = 1; - } + UPDATE_ATIME(inode); return read; } if (PIPE_WRITERS(*inode)) @@ -132,7 +129,7 @@ static long pipe_write(struct inode * inode, struct file * filp, free = 1; } inode->i_ctime = inode->i_mtime = CURRENT_TIME; - inode->i_dirt = 1; + mark_inode_dirty(inode); return written; } @@ -423,11 +420,13 @@ int do_pipe(int *fd) j = error; f1->f_inode = f2->f_inode = inode; + /* read file */ f1->f_pos = f2->f_pos = 0; f1->f_flags = O_RDONLY; f1->f_op = &read_pipe_fops; f1->f_mode = 1; + /* write file */ f2->f_flags = O_WRONLY; f2->f_op = &write_pipe_fops; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 18a40ffd85b4..0a5397830aad 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -201,5 +201,4 @@ void proc_read_inode(struct inode * inode) void proc_write_inode(struct inode * inode) { - inode->i_dirt=0; } diff --git a/fs/read_write.c b/fs/read_write.c index 81b19ac30a2b..eafabcfbd217 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -168,7 +168,6 @@ asmlinkage long sys_write(unsigned int fd, const char * buf, unsigned long count goto out; down(&inode->i_sem); error = write(inode,file,buf,count); - inode->i_status |= ST_MODIFIED; up(&inode->i_sem); out: fput(file, inode); @@ -264,8 +263,6 @@ static long do_readv_writev(int type, struct inode * inode, struct file * file, if (nr != len) break; } - if(fn == (IO_fn_t) file->f_op->write) - inode->i_status |= ST_MODIFIED; if (iov != iovstack) kfree(iov); return retval; diff --git a/fs/stat.c b/fs/stat.c index 0ab291256c0a..8959810a1787 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -256,10 +256,7 @@ asmlinkage int sys_readlink(const char * path, char * buf, int bufsiz) iput(inode); goto out; } - if (!IS_RDONLY(inode)) { - inode->i_atime = CURRENT_TIME; - inode->i_dirt = 1; - } + UPDATE_ATIME(inode); error = inode->i_op->readlink(inode,buf,bufsiz); iput(inode); out: diff --git a/fs/ufs/ufs_file.c b/fs/ufs/ufs_file.c index 74ae1a4702a0..b73e578769fb 100644 --- a/fs/ufs/ufs_file.c +++ b/fs/ufs/ufs_file.c @@ -41,6 +41,7 @@ struct inode_operations ufs_file_inode_operations = { NULL, /* mknod */ NULL, /* rename */ NULL, /* readlink */ + NULL, /* follow_link */ generic_readpage, /* readpage */ NULL, /* writepage */ ufs_bmap, /* bmap */ diff --git a/fs/ufs/ufs_namei.c b/fs/ufs/ufs_namei.c index 03ea2dde16e0..e849aabef3e0 100644 --- a/fs/ufs/ufs_namei.c +++ b/fs/ufs/ufs_namei.c @@ -35,12 +35,14 @@ static int ufs_match (int len, const char * const name, struct ufs_direct * d) } /* XXX - this is a mess, especially for endianity */ -int ufs_lookup (struct inode * dir, const char * name, int len, +int ufs_lookup (struct inode * dir, struct qstr *qname, struct inode ** result) { unsigned long int lfragno, fragno; struct buffer_head * bh; struct ufs_direct * d; + const char *name = qname->name; + int len = qname->len; if (dir->i_sb->u.ufs_sb.s_flags & UFS_DEBUG) printk("Passed name: %s\nPassed length: %d\n", name, len); diff --git a/fs/ufs/ufs_super.c b/fs/ufs/ufs_super.c index 342722237f9f..56dbd24f29db 100644 --- a/fs/ufs/ufs_super.c +++ b/fs/ufs/ufs_super.c @@ -254,7 +254,7 @@ ufs_read_super(struct super_block * sb, void * data, int silent) sb->u.ufs_sb.s_lmask = ~((ufs_swab32(usb->fs_fmask) - ufs_swab32(usb->fs_bmask)) >> ufs_swab32(usb->fs_fshift)); sb->u.ufs_sb.s_fsfrag = ufs_swab32(usb->fs_frag); /* XXX - rename this later */ - sb->s_mounted = iget(sb, UFS_ROOTINO); + sb->s_root = d_alloc_root(iget(sb, UFS_ROOTINO), NULL); #ifdef DEBUG_UFS_SUPER printk("ufs_read_super: inopb %u\n", sb->u.ufs_sb.s_inopb); diff --git a/include/linux/fs.h b/include/linux/fs.h index 724b62b7ace0..1ba16eeb7e7e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -78,9 +79,6 @@ extern int max_files, nr_files; */ #define FS_IBASKET 8 /* FS does callback to free_ibasket() if space gets low. */ -/* public flags for i_status */ -#define ST_MODIFIED 1024 - /* * These are the fs-independent mount-flags: up to 16 flags are supported */ @@ -126,7 +124,12 @@ extern int max_files, nr_files; #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE) #define IS_NOATIME(inode) ((inode)->i_flags & MS_NOATIME) -#define DO_UPDATE_ATIME(inode) (!IS_NOATIME(inode) && !IS_RDONLY(inode)) + +#define UPDATE_ATIME(inode) \ + if (!IS_NOATIME(inode) && !IS_RDONLY(inode)) { \ + inode->i_atime = CURRENT_TIME; \ + mark_inode_dirty(inode); \ + } /* the read-only stuff doesn't really belong here, but any other place is probably as bad and I don't want to create yet another include file. */ @@ -300,10 +303,8 @@ struct iattr { #include struct inode { - struct inode *i_hash_next; - struct inode *i_hash_prev; - struct inode *i_next; - struct inode *i_prev; + struct list_head i_hash; + struct list_head i_list; unsigned long i_ino; kdev_t i_dev; @@ -330,25 +331,14 @@ struct inode { struct page *i_pages; struct dquot *i_dquot[MAXQUOTAS]; - struct inode *i_lru_next; - struct inode *i_lru_prev; - - struct inode *i_basket_next; - struct inode *i_basket_prev; struct dentry *i_dentry; - unsigned short i_status; - unsigned short i_reuse_count; + unsigned int i_state; unsigned int i_flags; - unsigned char i_lock; - unsigned char i_dirt; unsigned char i_pipe; unsigned char i_sock; - unsigned char i_level; - unsigned short i_fill; - int i_writecount; unsigned int i_attr_flags; union { @@ -369,6 +359,17 @@ struct inode { } u; }; +/* Inode state bits.. */ +#define I_DIRTY 0 +#define I_LOCK 1 + +extern void __mark_inode_dirty(struct inode *); +static inline void mark_inode_dirty(struct inode *inode) +{ + if (!test_and_set_bit(I_DIRTY, &inode->i_state)) + __mark_inode_dirty(inode); +} + struct file { struct file *f_next, **f_pprev; struct inode *f_inode; @@ -735,42 +736,12 @@ extern inline void vfs_unlock(void) /* Not to be used by ordinary vfs users */ extern void _get_inode(struct inode * inode); -extern blocking void __iput(struct inode * inode); - -extern blocking void _iput(struct inode * inode); -extern inline blocking void iput(struct inode * inode) -{ - if (inode) { - extern void wake_up_interruptible(struct wait_queue **q); - - if (inode->i_pipe) - wake_up_interruptible(&inode->u.pipe_i.wait); - - /* It does not matter if somebody re-increments it in between, - * only the _last_ user needs to call _iput(). - */ - if (atomic_dec_and_test(&inode->i_count)) - _iput(inode); - } -} +extern void iput(struct inode * inode); extern blocking struct inode * iget(struct super_block * sb, unsigned long nr); -extern blocking void _clear_inode(struct inode * inode, int external, int verbose); -extern blocking inline void clear_inode(struct inode * inode) -{ - vfs_lock(); - _clear_inode(inode, 1, 1); - vfs_unlock(); -} -extern blocking struct inode * _get_empty_inode(void); -extern inline blocking struct inode * get_empty_inode(void) -{ - struct inode * inode; - vfs_lock(); - inode = _get_empty_inode(); - vfs_unlock(); - return inode; -} +extern blocking void clear_inode(struct inode * inode); +extern blocking struct inode * get_empty_inode(void); + /* Please prefer to use this function in future, instead of using * a get_empty_inode()/insert_inode_hash() combination. * It allows for better checking and less race conditions. diff --git a/include/linux/iso_fs.h b/include/linux/iso_fs.h index 03239cfc3ceb..3d65cb3e160c 100644 --- a/include/linux/iso_fs.h +++ b/include/linux/iso_fs.h @@ -152,7 +152,7 @@ extern int find_rock_ridge_relocation(struct iso_directory_record *, struct inod extern int isofs_open(struct inode * inode, struct file * filp); extern void isofs_release(struct inode * inode, struct file * filp); -extern int isofs_lookup(struct inode * dir,const char * name, int len, +extern int isofs_lookup(struct inode * dir, struct qstr *dentry, struct inode ** result); extern unsigned long isofs_count_free_inodes(struct super_block *sb); extern int isofs_new_block(int dev); diff --git a/include/linux/list.h b/include/linux/list.h new file mode 100644 index 000000000000..9c3c4fef57f1 --- /dev/null +++ b/include/linux/list.h @@ -0,0 +1,39 @@ +#ifndef _LINUX_LIST_H +#define _LINUX_LIST_H + +/* + * Simple doubly linked list implementation. + */ +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_HEAD(name) \ + struct list_head name = { &name, &name } + +#define INIT_LIST_HEAD(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + +static inline void list_add(struct list_head *new, struct list_head *head) +{ + struct list_head *next = head->next; + next->prev = new; + new->next = next; + new->prev = head; + head->next = new; +} + +static inline void list_del(struct list_head *entry) +{ + struct list_head *next, *prev; + next = entry->next; + prev = entry->prev; + next->prev = prev; + prev->next = next; +} + +#define list_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +#endif diff --git a/include/linux/minix_fs.h b/include/linux/minix_fs.h index 069686fffa3f..4c9410691f52 100644 --- a/include/linux/minix_fs.h +++ b/include/linux/minix_fs.h @@ -88,19 +88,18 @@ struct minix_dir_entry { #ifdef __KERNEL__ -extern int minix_lookup(struct inode * dir,const char * name, int len, +extern int minix_lookup(struct inode * dir, struct qstr *name, struct inode ** result); -extern int minix_create(struct inode * dir,const char * name, int len, int mode, - struct inode ** result); -extern int minix_mkdir(struct inode * dir, const char * name, int len, int mode); -extern int minix_rmdir(struct inode * dir, const char * name, int len); -extern int minix_unlink(struct inode * dir, const char * name, int len); -extern int minix_symlink(struct inode * inode, const char * name, int len, +extern int minix_create(struct inode * dir, struct dentry *dentry, int mode); +extern int minix_mkdir(struct inode * dir, struct dentry *dentry, int mode); +extern int minix_rmdir(struct inode * dir, struct dentry *dentry); +extern int minix_unlink(struct inode * dir, struct dentry *dentry); +extern int minix_symlink(struct inode * inode, struct dentry *dentry, const char * symname); -extern int minix_link(struct inode * oldinode, struct inode * dir, const char * name, int len); -extern int minix_mknod(struct inode * dir, const char * name, int len, int mode, int rdev); -extern int minix_rename(struct inode * old_dir, const char * old_name, int old_len, - struct inode * new_dir, const char * new_name, int new_len); +extern int minix_link(struct inode * oldinode, struct inode * dir, struct dentry *dentry); +extern int minix_mknod(struct inode * dir, struct dentry *dentry, int mode, int rdev); +extern int minix_rename(struct inode * old_dir, struct dentry *old_dentry, + struct inode * new_dir, struct dentry *new_dentry); extern struct inode * minix_new_inode(const struct inode * dir); extern void minix_free_inode(struct inode * inode); extern unsigned long minix_count_free_inodes(struct super_block *sb); diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h index af856645dfd4..36aa722b16f7 100644 --- a/include/linux/ufs_fs.h +++ b/include/linux/ufs_fs.h @@ -225,7 +225,7 @@ extern void ufs_put_inode(struct inode * inode); extern void ufs_print_inode (struct inode *); /* ufs_namei.c */ -extern int ufs_lookup (struct inode *, const char *, int, struct inode **); +extern int ufs_lookup (struct inode *, struct qstr *, struct inode **); /* ufs_super.c */ extern void ufs_warning (struct super_block *, const char *, const char *, ...) diff --git a/kernel/ksyms.c b/kernel/ksyms.c index b5c2b4d0abb9..3e66053b71aa 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -142,7 +142,7 @@ EXPORT_SYMBOL(getname); EXPORT_SYMBOL(putname); EXPORT_SYMBOL(__fput); EXPORT_SYMBOL(iget); -EXPORT_SYMBOL(_iput); +EXPORT_SYMBOL(iput); EXPORT_SYMBOL(__namei); EXPORT_SYMBOL(lookup_dentry); EXPORT_SYMBOL(open_namei); @@ -342,7 +342,7 @@ EXPORT_SYMBOL(set_writetime); EXPORT_SYMBOL(sys_tz); EXPORT_SYMBOL(__wait_on_super); EXPORT_SYMBOL(file_fsync); -EXPORT_SYMBOL(_clear_inode); +EXPORT_SYMBOL(clear_inode); EXPORT_SYMBOL(refile_buffer); EXPORT_SYMBOL(nr_async_pages); EXPORT_SYMBOL(___strtok); @@ -353,7 +353,7 @@ EXPORT_SYMBOL(chrdev_inode_operations); EXPORT_SYMBOL(blkdev_inode_operations); EXPORT_SYMBOL(read_ahead); EXPORT_SYMBOL(get_hash_table); -EXPORT_SYMBOL(_get_empty_inode); +EXPORT_SYMBOL(get_empty_inode); EXPORT_SYMBOL(insert_inode_hash); EXPORT_SYMBOL(event); EXPORT_SYMBOL(__down); diff --git a/kernel/sys.c b/kernel/sys.c index 691d2b10bc30..b06a4fd27b78 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -399,8 +399,6 @@ int acct_process(long exitcode) acct_file.f_op->write(acct_file.f_inode, &acct_file, (char *)&ac, sizeof(struct acct)); - /* inode->i_status |= ST_MODIFIED is willingly *not* done here */ - set_fs(fs); } return 0; diff --git a/mm/filemap.c b/mm/filemap.c index 71bcb982f6aa..b4d2fb814a64 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -753,10 +753,7 @@ page_read_error: filp->f_reada = 1; if (page_cache) free_page(page_cache); - if (DO_UPDATE_ATIME(inode)) { - inode->i_atime = CURRENT_TIME; - inode->i_dirt = 1; - } + UPDATE_ATIME(inode) if (!read) read = error; return read; @@ -919,7 +916,6 @@ static inline int do_write_page(struct inode * inode, struct file * file, retval = -EIO; if (size == file->f_op->write(inode, file, (const char *) page, size)) retval = 0; - /* inode->i_status |= ST_MODIFIED is willingly *not* done here */ set_fs(old_fs); return retval; } @@ -1189,10 +1185,7 @@ int generic_file_mmap(struct inode * inode, struct file * file, struct vm_area_s return -EACCES; if (!inode->i_op || !inode->i_op->readpage) return -ENOEXEC; - if (DO_UPDATE_ATIME(inode)) { - inode->i_atime = CURRENT_TIME; - inode->i_dirt = 1; - } + UPDATE_ATIME(inode); vma->vm_inode = inode; atomic_inc(&inode->i_count); vma->vm_ops = ops; -- 2.39.5