KERNELHDRS =/usr/src/linux/include
.c.s:
- $(CC) $(CFLAGS) -S $<
+ $(CC) $(CFLAGS) -S -o $*.s $<
.s.o:
$(AS) -c -o $*.o $<
.c.o:
all: Version Image
linuxsubdirs: dummy
- @for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE)) || exit; done
+ @for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE)) || exit; done
Version:
@./makever.sh
- @echo \#define UTS_RELEASE \"0.97-`cat .version`\" > include/linux/config_rel.h
+ @echo \#define UTS_RELEASE \"0.97.pl2-`cat .version`\" > include/linux/config_rel.h
@echo \#define UTS_VERSION \"`date +%D`\" > include/linux/config_ver.h
touch include/linux/config.h
boot/head.o: boot/head.s
+init/main.o: init/main.c
+ $(CC) $(CFLAGS) $(PROFILING) -c -o $*.o $<
+
tools/system: boot/head.o init/main.o linuxsubdirs
$(LD) $(LDFLAGS) -M boot/head.o init/main.o \
$(ARCHIVES) \
rm -f Image System.map tmp_make core boot/bootsect boot/setup \
boot/bootsect.s boot/setup.s init/main.s
rm -f init/*.o tools/system tools/build boot/*.o
- for i in $(SUBDIRS); do (cd $$i; $(MAKE) clean); done
+ for i in $(SUBDIRS); do (cd $$i && $(MAKE) clean); done
backup: clean
- cd .. ; tar cf - linux | compress - > backup.Z
+ cd .. && tar cf - linux | compress - > backup.Z
sync
depend dep:
sed '/\#\#\# Dependencies/q' < Makefile > tmp_make
for i in init/*.c;do echo -n "init/";$(CPP) -M $$i;done >> tmp_make
cp tmp_make Makefile
- for i in $(SUBDIRS); do (cd $$i; $(MAKE) dep) || exit; done
+ for i in $(SUBDIRS); do (cd $$i && $(MAKE) dep) || exit; done
dummy:
* the page directory.
*/
.text
-.globl _idt,_gdt,_pg_dir,_tmp_floppy_area,_floppy_track_buffer
+.globl _idt,_gdt,_swapper_pg_dir,_tmp_floppy_area,_floppy_track_buffer
/*
- * pg_dir is the main page directory, address 0x00000000
+ * swapper_pg_dir is the main page directory, address 0x00000000
*/
-_pg_dir:
+_swapper_pg_dir:
startup_32:
cld
movl $0x10,%eax
mov %ax,%gs
lss _stack_start,%esp
call setup_idt
- call setup_gdt
- movl $0x10,%eax # reload all the segment registers
- mov %ax,%ds # after changing gdt. CS was already
- mov %ax,%es # reloaded in 'setup_gdt'
- mov %ax,%fs
- mov %ax,%gs
- lss _stack_start,%esp
xorl %eax,%eax
1: incl %eax # check that A20 really IS enabled
movl %eax,0x000000 # loop forever if it isn't
* setup_idt
*
* sets up a idt with 256 entries pointing to
- * ignore_int, interrupt gates. It then loads
- * idt. Everything that wants to install itself
- * in the idt-table may do so themselves. Interrupts
+ * ignore_int, interrupt gates. It doesn't actually load
+ * idt - that can be done only after paging has been enabled
+ * and the kernel moved to 0xC0000000. Interrupts
* are enabled elsewhere, when we can be relatively
* sure everything is ok. This routine will be over-
* written by the page tables.
addl $8,%edi
dec %ecx
jne rp_sidt
- lidt idt_descr
- ret
-
-/*
- * setup_gdt
- *
- * This routines sets up a new gdt and loads it.
- * Only two entries are currently built, the same
- * ones that were built in init.s. The routine
- * is VERY complicated at two whole lines, so this
- * rather long comment is certainly needed :-).
- * This routine will beoverwritten by the page tables.
- */
-setup_gdt:
- lgdt gdt_descr
ret
/*
after_page_tables:
call setup_paging
+ lgdt gdt_descr
+ lidt idt_descr
+ ljmp $0x08,$1f
+1: movl $0x10,%eax # reload all the segment registers
+ mov %ax,%ds # after changing gdt.
+ mov %ax,%es
+ mov %ax,%fs
+ mov %ax,%gs
+ lss _stack_start,%esp
pushl $0 # These are the parameters to main :-)
pushl $0
pushl $0
*/
.align 2
setup_paging:
- movl $1024*5,%ecx /* 5 pages - pg_dir+4 page tables */
+ movl $1024*5,%ecx /* 5 pages - swapper_pg_dir+4 page tables */
xorl %eax,%eax
- xorl %edi,%edi /* pg_dir is at 0x000 */
+ xorl %edi,%edi /* swapper_pg_dir is at 0x000 */
cld;rep;stosl
- movl $pg0+7,_pg_dir /* set present bit/user r/w */
- movl $pg1+7,_pg_dir+4 /* --------- " " --------- */
- movl $pg2+7,_pg_dir+8 /* --------- " " --------- */
- movl $pg3+7,_pg_dir+12 /* --------- " " --------- */
+/* Identity-map the kernel in low 4MB memory for ease of transition */
+ movl $pg0+7,_swapper_pg_dir /* set present bit/user r/w */
+/* But the real place is at 0xC0000000 */
+ movl $pg0+7,_swapper_pg_dir+3072 /* set present bit/user r/w */
+ movl $pg1+7,_swapper_pg_dir+3076 /* --------- " " --------- */
+ movl $pg2+7,_swapper_pg_dir+3080 /* --------- " " --------- */
+ movl $pg3+7,_swapper_pg_dir+3084 /* --------- " " --------- */
movl $pg3+4092,%edi
movl $0xfff007,%eax /* 16Mb - 4096 + 7 (r/w user,p) */
std
subl $0x1000,%eax
jge 1b
cld
- xorl %eax,%eax /* pg_dir is at 0x0000 */
+ xorl %eax,%eax /* swapper_pg_dir is at 0x0000 */
movl %eax,%cr3 /* cr3 - page directory start */
movl %cr0,%eax
orl $0x80000000,%eax
movl %eax,%cr0 /* set paging (PG) bit */
ret /* this also flushes prefetch-queue */
-.align 2
+/*
+ * The interrupt descriptor table has room for 256 idt's
+ */
+.align 4
.word 0
idt_descr:
.word 256*8-1 # idt contains 256 entries
- .long _idt
-.align 2
+ .long 0xc0000000+_idt
+
+.align 4
+_idt:
+ .fill 256,8,0 # idt is uninitialized
+
+/*
+ * The real GDT is also 256 entries long - no real reason
+ */
+.align 4
.word 0
gdt_descr:
- .word 256*8-1 # so does gdt (not that that's any
- .long _gdt # magic number, but it works for me :^)
-
- .align 3
-_idt: .fill 256,8,0 # idt is uninitialized
+ .word 256*8-1
+ .long 0xc0000000+_gdt
-_gdt: .quad 0x0000000000000000 /* NULL descriptor */
- .quad 0x00c09a0000000fff /* 16Mb */
- .quad 0x00c0920000000fff /* 16Mb */
+.align 4
+_gdt:
+ .quad 0x0000000000000000 /* NULL descriptor */
+ .quad 0xc0c09a0000000fff /* 16Mb at 0xC0000000 */
+ .quad 0xc0c0920000000fff /* 16Mb */
.quad 0x0000000000000000 /* TEMPORARY - don't use */
.fill 252,8,0 /* space for LDT's and TSS's etc */
OBJS= open.o read_write.o inode.o file_table.o buffer.o super.o \
block_dev.o stat.o exec.o pipe.o namei.o fcntl.o ioctl.o \
- select.o fifo.o
+ select.o fifo.o locks.o
all: fs.o fssubdirs
$(LD) -r -o fs.o $(OBJS)
fssubdirs: dummy
- @for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE)) || exit; done
+ @for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE)) || exit; done
clean:
rm -f core *.o *.a tmp_make
for i in *.c; do rm -f `basename $$i .c`.s;done
- for i in $(SUBDIRS); do (cd $$i; $(MAKE) clean); done
+ for i in $(SUBDIRS); do (cd $$i && $(MAKE) clean); done
depend dep:
sed '/\#\#\# Dependencies/q' < Makefile > tmp_make
for i in *.c;do $(CPP) -M $$i;done >> tmp_make
cp tmp_make Makefile
- for i in $(SUBDIRS); do (cd $$i; $(MAKE) dep) || exit; done
+ for i in $(SUBDIRS); do (cd $$i && $(MAKE) dep) || exit; done
dummy:
tmp = bh;
bh->b_data = (char * ) (page+i);
bh->b_size = size;
- i += size;
}
tmp = bh;
while (1) {
if(current->rlim[RLIMIT_CORE].rlim_cur < PAGE_SIZE/1024) return 0;
__asm__("mov %%fs,%0":"=r" (fs));
__asm__("mov %0,%%fs"::"r" ((unsigned short) 0x10));
- if (open_namei("core",O_CREAT | O_WRONLY | O_TRUNC,0600,&inode))
+ if (open_namei("core",O_CREAT | O_WRONLY | O_TRUNC,0600,&inode,NULL))
goto end_coredump;
if (!S_ISREG(inode->i_mode))
goto end_coredump;
struct inode * inode;
struct buffer_head * bh;
struct exec ex;
+ int error;
- if (get_limit(0x17) != TASK_SIZE)
+ if (!library || get_limit(0x17) != TASK_SIZE)
return -EINVAL;
if ((libnum >= MAX_SHARED_LIBS) || (libnum < 0))
return -EINVAL;
- if (library)
- inode = namei(library);
- else
- inode = NULL;
- if (!inode)
- return -ENOENT;
+ error = namei(library,&inode);
+ if (error)
+ return error;
if (!inode->i_sb || !S_ISREG(inode->i_mode) || !permission(inode,MAY_READ)) {
iput(inode);
return -EACCES;
current->libraries[libnum].library = inode;
current->libraries[libnum].start = ex.a_entry;
current->libraries[libnum].length = (ex.a_data+ex.a_text+0xfff) & 0xfffff000;
+ current->libraries[libnum].bss = (ex.a_bss+0xfff) & 0xfffff000;
#if 0
printk("Loaded library %d at %08x, length %08x\n",
libnum,
code_limit = TASK_SIZE;
data_limit = TASK_SIZE;
- code_base = get_base(current->ldt[1]);
- data_base = code_base;
+ code_base = data_base = 0;
+ current->start_code = code_base;
set_base(current->ldt[1],code_base);
set_limit(current->ldt[1],code_limit);
set_base(current->ldt[2],data_base);
set_limit(current->ldt[2],data_limit);
/* make sure fs points to the NEW data segment */
__asm__("pushl $0x17\n\tpop %%fs"::);
- data_base += data_limit - LIBRARY_SIZE;
+ data_base += data_limit;
for (i=MAX_ARG_PAGES-1 ; i>=0 ; i--) {
data_base -= PAGE_SIZE;
if (page[i])
- put_dirty_page(page[i],data_base);
+ put_dirty_page(current,page[i],data_base);
}
return data_limit;
}
panic("execve called from supervisor mode");
for (i=0 ; i<MAX_ARG_PAGES ; i++) /* clear page-table */
page[i]=0;
- if (!(inode=namei(filename))) /* get executables inode */
- return -ENOENT;
+ retval = namei(filename,&inode); /* get executable inode */
+ if (retval)
+ return retval;
argc = count(argv);
envc = count(envp);
*/
old_fs = get_fs();
set_fs(get_ds());
- if (!(inode=namei(interp))) { /* get executables inode */
- set_fs(old_fs);
- retval = -ENOENT;
- goto exec_error1;
- }
+ retval = namei(interp,&inode);
set_fs(old_fs);
+ if (retval)
+ goto exec_error1;
goto restart_interp;
}
brelse(bh);
if ((current->close_on_exec>>i)&1)
sys_close(i);
current->close_on_exec = 0;
- free_page_tables(get_base(current->ldt[1]),get_limit(0x0f));
- free_page_tables(get_base(current->ldt[2]),get_limit(0x17));
+ clear_page_tables(current);
if (last_task_used_math == current)
last_task_used_math = NULL;
current->used_math = 0;
p += change_ldt(ex.a_text,page);
- p -= LIBRARY_SIZE + MAX_ARG_PAGES*PAGE_SIZE;
+ p -= MAX_ARG_PAGES*PAGE_SIZE;
p = (unsigned long) create_tables((char *)p,argc,envc);
current->brk = ex.a_bss +
(current->end_data = ex.a_data +
(current->end_code = ex.a_text));
current->start_stack = p;
- current->rss = (LIBRARY_OFFSET - p + PAGE_SIZE-1) / PAGE_SIZE;
+ current->rss = (TASK_SIZE - p + PAGE_SIZE-1) / PAGE_SIZE;
current->suid = current->euid = e_uid;
current->sgid = current->egid = e_gid;
if (N_MAGIC(ex) == OMAGIC)
inode->i_ino = j;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_op = NULL;
+ inode->i_blocks = inode->i_blksize = 0;
#ifdef EXTFS_DEBUG
printk("ext_new_inode : allocating inode %d\n", inode->i_ino);
#endif
inode->i_nlink = raw_inode->i_nlinks;
inode->i_size = raw_inode->i_size;
inode->i_mtime = inode->i_atime = inode->i_ctime = raw_inode->i_time;
+ inode->i_blocks = inode->i_blksize = 0;
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
inode->i_rdev = raw_inode->i_zone[0];
else for (block = 0; block < 12; block++)
#include <linux/stat.h>
static int ext_readlink(struct inode *, char *, int);
-static struct inode * ext_follow_link(struct inode *, struct inode *);
+static int ext_follow_link(struct inode *, struct inode *, int, int, struct inode **);
/*
* symlinks can't do much...
NULL /* truncate */
};
-static struct inode * ext_follow_link(struct inode * dir, struct inode * inode)
+static int ext_follow_link(struct inode * dir, struct inode * inode,
+ int flag, int mode, struct inode ** res_inode)
{
+ int error;
unsigned short fs;
struct buffer_head * bh;
}
if (!inode) {
iput(dir);
- return NULL;
+ *res_inode = NULL;
+ return -ENOENT;
}
if (!S_ISLNK(inode->i_mode)) {
iput(dir);
- return inode;
+ *res_inode = inode;
+ return 0;
}
__asm__("mov %%fs,%0":"=r" (fs));
if ((current->link_count > 5) || !inode->i_data[0] ||
!(bh = bread(inode->i_dev, inode->i_data[0], BLOCK_SIZE))) {
iput(dir);
iput(inode);
- return NULL;
+ *res_inode = NULL;
+ return -ELOOP;
}
iput(inode);
__asm__("mov %0,%%fs"::"r" ((unsigned short) 0x10));
current->link_count++;
- inode = _namei(bh->b_data,dir,1);
+ error = open_namei(bh->b_data,flag,mode,res_inode,dir);
current->link_count--;
__asm__("mov %0,%%fs"::"r" (fs));
brelse(bh);
- return inode;
+ return error;
}
static int ext_readlink(struct inode * inode, char * buffer, int buflen)
#include <linux/string.h>
extern int sys_close(int fd);
+extern int fcntl_getlk(unsigned int, struct flock *);
+extern int fcntl_setlk(unsigned int, unsigned int, struct flock *);
static int dupfd(unsigned int fd, unsigned int arg)
{
filp->f_flags &= ~(O_APPEND | O_NONBLOCK);
filp->f_flags |= arg & (O_APPEND | O_NONBLOCK);
return 0;
- case F_GETLK: case F_SETLK: case F_SETLKW:
- return -ENOSYS;
+ case F_GETLK:
+ return fcntl_getlk(fd, (struct flock *) arg);
+ case F_SETLK:
+ return fcntl_setlk(fd, cmd, (struct flock *) arg);
+ case F_SETLKW:
+ return fcntl_setlk(fd, cmd, (struct flock *) arg);
default:
/* sockets need a few special fcntls. */
if (S_ISSOCK (filp->f_inode->i_mode))
--- /dev/null
+/*
+ * linux/fs/locks.c
+ *
+ * Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls.
+ * Doug Evans, 92Aug07, dje@sspiff.uucp.
+ *
+ * FIXME: two things aren't handled yet:
+ * - deadlock detection/avoidance (of dubious merit, but since it's in
+ * the definition, I guess it should be provided eventually)
+ * - mandatory locks (requires lots of changes elsewhere)
+ */
+
+#include <asm/segment.h>
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+
+#define OFFSET_MAX 0x7fffffff /* FIXME: move elsewhere? */
+
+static int copy_flock(struct file *filp, struct file_lock *fl, struct flock *l);
+static int conflict(struct file_lock *caller_fl, struct file_lock *sys_fl);
+static int overlap(struct file_lock *fl1, struct file_lock *fl2);
+static int lock_it(struct file *filp, struct file_lock *caller);
+static int unlock_it(struct file *filp, struct file_lock *caller);
+static struct file_lock *alloc_lock(struct file *filp, struct file_lock *template);
+static void free_lock(struct file *filp, struct file_lock *fl);
+
+static struct file_lock file_lock_table[NR_FILE_LOCKS];
+static struct file_lock *file_lock_free_list;
+
+/*
+ * Called at boot time to initialize the lock table ...
+ */
+
+void fcntl_init_locks(void)
+{
+ struct file_lock *fl;
+
+ for (fl = &file_lock_table[0]; fl < file_lock_table + NR_FILE_LOCKS - 1; fl++) {
+ fl->fl_next = fl + 1;
+ fl->fl_owner = NULL;
+ }
+ file_lock_table[NR_FILE_LOCKS - 1].fl_next = NULL;
+ file_lock_table[NR_FILE_LOCKS - 1].fl_owner = NULL;
+ file_lock_free_list = &file_lock_table[0];
+}
+
+int fcntl_getlk(unsigned int fd, struct flock *l)
+{
+ struct flock flock;
+ struct file *filp;
+ struct file_lock *fl,file_lock;
+
+ if (fd >= NR_OPEN || !(filp = current->filp[fd]))
+ return -EBADF;
+ verify_area(l, sizeof(*l));
+ memcpy_fromfs(&flock, l, sizeof(flock));
+ if (flock.l_type == F_UNLCK)
+ return -EINVAL;
+ if (!copy_flock(filp, &file_lock, &flock))
+ return -EINVAL;
+
+ for (fl = filp->f_inode->i_flock; fl != NULL; fl = fl->fl_next) {
+ if (conflict(&file_lock, fl)) {
+ flock.l_pid = fl->fl_owner->pid;
+ flock.l_start = fl->fl_start;
+ flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
+ fl->fl_end - fl->fl_start + 1;
+ flock.l_whence = fl->fl_whence;
+ flock.l_type = fl->fl_type;
+ memcpy_tofs(l, &flock, sizeof(flock));
+ return 0;
+ }
+ }
+
+ flock.l_type = F_UNLCK; /* no conflict found */
+ memcpy_tofs(l, &flock, sizeof(flock));
+ return 0;
+}
+
+/*
+ * This function implements both F_SETLK and F_SETLKW.
+ */
+
+int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l)
+{
+ struct file *filp;
+ struct file_lock *fl,file_lock;
+ struct flock flock;
+
+ /*
+ * Get arguments and validate them ...
+ */
+
+ if (fd >= NR_OPEN || !(filp = current->filp[fd]))
+ return -EBADF;
+ verify_area(l, sizeof(*l));
+ memcpy_fromfs(&flock, l, sizeof(flock));
+ if (!copy_flock(filp, &file_lock, &flock))
+ return -EINVAL;
+ switch (file_lock.fl_type) {
+ case F_RDLCK :
+ if (!(filp->f_mode & 1))
+ return -EBADF;
+ break;
+ case F_WRLCK :
+ if (!(filp->f_mode & 2))
+ return -EBADF;
+ break;
+ case F_UNLCK :
+ break;
+ }
+
+ /*
+ * F_UNLCK needs to be handled differently ...
+ */
+
+ if (file_lock.fl_type == F_UNLCK)
+ return unlock_it(filp, &file_lock);
+
+ /*
+ * Scan for a conflicting lock ...
+ */
+
+repeat:
+ for (fl = filp->f_inode->i_flock; fl != NULL; fl = fl->fl_next) {
+ if (!conflict(&file_lock, fl))
+ continue;
+ /*
+ * File is locked by another process. If this is F_SETLKW
+ * wait for the lock to be released.
+ * FIXME: We need to check for deadlocks here.
+ */
+ if (cmd == F_SETLKW) {
+ interruptible_sleep_on(&fl->fl_wait);
+ goto repeat;
+ }
+ return -EAGAIN;
+ }
+
+ /*
+ * Lock doesn't conflict with any other lock ...
+ */
+
+ return lock_it(filp, &file_lock);
+}
+
+/*
+ * This function is called when the file is closed.
+ */
+
+void fcntl_remove_locks(struct task_struct *task, struct file *filp)
+{
+ struct file_lock *fl,*next;
+
+ for (fl = filp->f_inode->i_flock; fl != NULL; ) {
+ /*
+ * If this one is freed, {fl_next} gets clobbered when the
+ * entry is moved to the free list, so grab it now ...
+ */
+ next = fl->fl_next;
+ if (fl->fl_owner == task)
+ free_lock(filp, fl);
+ fl = next;
+ }
+}
+
+/*
+ * Verify a "struct flock" and copy it to a "struct file_lock" ...
+ * Result is a boolean indicating success.
+ */
+
+static int copy_flock(struct file *filp, struct file_lock *fl, struct flock *l)
+{
+ off_t start;
+
+ if (!filp->f_inode) /* just in case */
+ return 0;
+ if (!S_ISREG(filp->f_inode->i_mode))
+ return 0;
+ if (l->l_type != F_UNLCK && l->l_type != F_RDLCK && l->l_type != F_WRLCK)
+ return 0;
+ switch (l->l_whence) {
+ case 0 /*SEEK_SET*/ : start = 0; break;
+ case 1 /*SEEK_CUR*/ : start = filp->f_pos; break;
+ case 2 /*SEEK_END*/ : start = filp->f_inode->i_size; break;
+ default : return 0;
+ }
+ if ((start += l->l_start) < 0 || l->l_len < 0)
+ return 0;
+ fl->fl_type = l->l_type;
+ fl->fl_start = start; /* we record the absolute position */
+ fl->fl_whence = 0; /* FIXME: do we record {l_start} as passed? */
+ if (l->l_len == 0 || (fl->fl_end = start + l->l_len - 1) < 0)
+ fl->fl_end = OFFSET_MAX;
+ fl->fl_owner = current;
+ fl->fl_wait = NULL; /* just for cleanliness */
+ return 1;
+}
+
+/*
+ * Determine if lock {sys_fl} blocks lock {caller_fl} ...
+ */
+
+static int conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
+{
+ if (caller_fl->fl_owner == sys_fl->fl_owner)
+ return 0;
+ if (!overlap(caller_fl, sys_fl))
+ return 0;
+ switch (caller_fl->fl_type) {
+ case F_RDLCK :
+ return sys_fl->fl_type != F_RDLCK;
+ case F_WRLCK :
+ return 1; /* overlapping region not owned by caller */
+ }
+ return 0; /* shouldn't get here, but just in case */
+}
+
+static int overlap(struct file_lock *fl1, struct file_lock *fl2)
+{
+ if (fl1->fl_start <= fl2->fl_start) {
+ return fl1->fl_end >= fl2->fl_start;
+ } else {
+ return fl2->fl_end >= fl1->fl_start;
+ }
+}
+
+/*
+ * Add a lock to a file ...
+ * Result is 0 for success or -ENOLCK.
+ *
+ * We try to be real clever here and always minimize the number of table
+ * entries we use. For example we merge adjacent locks whenever possible. This
+ * consumes a bit of cpu and code space, is it really worth it? Beats me.
+ *
+ * I've tried to keep the following as small and simple as possible. If you can
+ * make it smaller or simpler, please do. /dje 92Aug11
+ *
+ * WARNING: We assume the lock doesn't conflict with any other lock.
+ */
+
+static int lock_it(struct file *filp, struct file_lock *caller)
+{
+ struct file_lock *fl,*new;
+
+ /*
+ * It's easier if we allocate a slot for the lock first, and then
+ * release it later if we have to (IE: if it can be merged with
+ * another). This way the for() loop always knows that {caller} is an
+ * existing entry. This will cause the routine to fail unnecessarily
+ * in rare cases, but perfection can be pushed too far. :-)
+ */
+
+ if ((caller = alloc_lock(filp, caller)) == NULL)
+ return -ENOLCK;
+
+ /*
+ * First scan to see if we are changing/augmenting an existing lock ...
+ */
+
+ for (fl = filp->f_inode->i_flock; fl != NULL; fl = fl->fl_next) {
+ if (caller->fl_owner != fl->fl_owner)
+ continue;
+ if (caller == fl)
+ continue;
+ if (!overlap(caller, fl)) {
+ /*
+ * Detect adjacent regions (if same lock type) ...
+ */
+ if (caller->fl_type != fl->fl_type)
+ continue;
+ if (caller->fl_end + 1 == fl->fl_start) {
+ fl->fl_start = caller->fl_start;
+ free_lock(filp, caller);
+ caller = fl;
+ /* must continue, may overlap others now */
+ } else if (caller->fl_start - 1 == fl->fl_end) {
+ fl->fl_end = caller->fl_end;
+ free_lock(filp, caller);
+ caller = fl;
+ /* must continue, may overlap others now */
+ }
+ continue;
+ }
+ /*
+ * We've found an overlapping region. Is it a change of lock
+ * type, or are we changing the size of the locked space?
+ */
+ if (caller->fl_type != fl->fl_type) {
+ if (caller->fl_start > fl->fl_start && caller->fl_end < fl->fl_end) {
+ /*
+ * The new lock splits the old one in two ...
+ * {fl} is the bottom piece, {caller} is the
+ * new lock, and {new} is the top piece.
+ */
+ if ((new = alloc_lock(filp, fl)) == NULL) {
+ free_lock(filp, caller);
+ return -ENOLCK;
+ }
+ fl->fl_end = caller->fl_start - 1;
+ new->fl_start = caller->fl_end + 1;
+ return 0;
+ }
+ if (caller->fl_start <= fl->fl_start && caller->fl_end >= fl->fl_end) {
+ /*
+ * The new lock completely replaces old one ...
+ */
+ free_lock(filp, fl);
+ return 0;
+ }
+ if (caller->fl_end < fl->fl_end) {
+ fl->fl_start = caller->fl_end + 1;
+ /* must continue, may be more overlaps */
+ } else if (caller->fl_start > fl->fl_start) {
+ fl->fl_end = caller->fl_start - 1;
+ /* must continue, may be more overlaps */
+ } else {
+ printk("lock_it: program bug: unanticipated overlap\n");
+ free_lock(filp, caller);
+ return -ENOLCK;
+ }
+ } else { /* The new lock augments an existing lock ... */
+ int grew = 0;
+
+ if (caller->fl_start < fl->fl_start) {
+ fl->fl_start = caller->fl_start;
+ grew = 1;
+ }
+ if (caller->fl_end > fl->fl_end) {
+ fl->fl_end = caller->fl_end;
+ grew = 1;
+ }
+ free_lock(filp, caller);
+ caller = fl;
+ if (!grew)
+ return 0;
+ /* must continue, may be more overlaps */
+ }
+ }
+
+ /*
+ * New lock doesn't overlap any regions ...
+ * alloc_lock() has already been called, so we're done!
+ */
+
+ return 0;
+}
+
+/*
+ * Handle F_UNLCK ...
+ * Result is 0 for success, or -EINVAL or -ENOLCK.
+ * ENOLCK can happen when a lock is split into two.
+ */
+
+static int unlock_it(struct file *filp, struct file_lock *caller)
+{
+ int one_unlocked = 0;
+ struct file_lock *fl,*next;
+
+ for (fl = filp->f_inode->i_flock; fl != NULL; ) {
+ if (caller->fl_owner != fl->fl_owner || !overlap(caller, fl)) {
+ fl = fl->fl_next;
+ continue;
+ }
+ one_unlocked = 1;
+ if (caller->fl_start > fl->fl_start && caller->fl_end < fl->fl_end) {
+ /*
+ * Lock is split in two ...
+ * {fl} is the bottom piece, {next} is the top piece.
+ */
+ if ((next = alloc_lock(filp, fl)) == NULL)
+ return -ENOLCK;
+ fl->fl_end = caller->fl_start - 1;
+ next->fl_start = caller->fl_end + 1;
+ return 0;
+ }
+ /*
+ * At this point we know there is an overlap and we know the
+ * lock isn't split into two ...
+ *
+ * Unless the lock table is broken, entries will not overlap.
+ * IE: User X won't have an entry locking bytes 1-3 and another
+ * entry locking bytes 3-5. Therefore, if the area being
+ * unlocked is a subset of the total area, we don't need to
+ * traverse any more of the list. The code is a tad more
+ * complicated by this optimization. Perhaps it's not worth it.
+ *
+ * WARNING: We assume free_lock() does not alter
+ * {fl_start, fl_end}.
+ *
+ * {fl_next} gets clobbered when the entry is moved to
+ * the free list, so grab it now ...
+ */
+ next = fl->fl_next;
+ if (caller->fl_start <= fl->fl_start && caller->fl_end >= fl->fl_end) {
+ free_lock(filp, fl);
+ } else if (caller->fl_start > fl->fl_start) {
+ fl->fl_end = caller->fl_start - 1;
+ } else {
+ /* caller->fl_end < fl->fl_end */
+ fl->fl_start = caller->fl_end + 1;
+ }
+ if (caller->fl_start >= fl->fl_start && caller->fl_end <= fl->fl_end)
+ return 0; /* no more to be found */
+ fl = next;
+ /* must continue, there may be more to unlock */
+ }
+
+ return one_unlocked ? 0 : -EINVAL;
+}
+
+static struct file_lock *alloc_lock(struct file *filp, struct file_lock *template)
+{
+ struct file_lock *new;
+
+ if (file_lock_free_list == NULL)
+ return NULL; /* no available entry */
+ if (file_lock_free_list->fl_owner != NULL)
+ panic("alloc_lock: broken free list\n");
+
+ new = file_lock_free_list; /* remove from free list */
+ file_lock_free_list = file_lock_free_list->fl_next;
+
+ *new = *template;
+
+ new->fl_next = filp->f_inode->i_flock; /* insert into file's list */
+ filp->f_inode->i_flock = new;
+
+ new->fl_owner = current; /* FIXME: needed? */
+ new->fl_wait = NULL;
+ return new;
+}
+
+/*
+ * Add a lock to the free list ...
+ *
+ * WARNING: We must not alter {fl_start, fl_end}. See unlock_it().
+ */
+
+static void free_lock(struct file *filp, struct file_lock *fl)
+{
+ struct file_lock **fl_p;
+
+ if (fl->fl_owner == NULL) /* sanity check */
+ panic("free_lock: broken lock list\n");
+
+ /*
+ * We only use a singly linked list to save some memory space
+ * (the only place we'd use a doubly linked list is here).
+ */
+
+ for (fl_p = &filp->f_inode->i_flock; *fl_p != NULL; fl_p = &(*fl_p)->fl_next) {
+ if (*fl_p == fl)
+ break;
+ }
+ if (*fl_p == NULL) {
+ printk("free_lock: lock is not in file's lock list\n");
+ } else {
+ *fl_p = (*fl_p)->fl_next;
+ }
+
+ fl->fl_next = file_lock_free_list; /* add to free list */
+ file_lock_free_list = fl;
+ fl->fl_owner = NULL; /* for sanity checks */
+
+ wake_up(&fl->fl_wait);
+}
inode->i_ino = j + i*8192;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_op = NULL;
+ inode->i_blocks = inode->i_blksize = 0;
return inode;
}
inode->i_nlink = raw_inode->i_nlinks;
inode->i_size = raw_inode->i_size;
inode->i_mtime = inode->i_atime = inode->i_ctime = raw_inode->i_time;
+ inode->i_blocks = inode->i_blksize = 0;
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
inode->i_rdev = raw_inode->i_zone[0];
else for (block = 0; block < 9; block++)
#include <linux/stat.h>
static int minix_readlink(struct inode *, char *, int);
-static struct inode * minix_follow_link(struct inode *, struct inode *);
+static int minix_follow_link(struct inode *, struct inode *, int, int, struct inode **);
/*
* symlinks can't do much...
NULL /* truncate */
};
-static struct inode * minix_follow_link(struct inode * dir, struct inode * inode)
+static int minix_follow_link(struct inode * dir, struct inode * inode,
+ int flag, int mode, struct inode ** res_inode)
{
+ int error;
unsigned short fs;
struct buffer_head * bh;
}
if (!inode) {
iput(dir);
- return NULL;
+ *res_inode = NULL;
+ return -ENOENT;
}
if (!S_ISLNK(inode->i_mode)) {
iput(dir);
- return inode;
+ *res_inode = inode;
+ return 0;
}
__asm__("mov %%fs,%0":"=r" (fs));
if ((current->link_count > 5) || !inode->i_data[0] ||
!(bh = bread(inode->i_dev, inode->i_data[0], BLOCK_SIZE))) {
iput(dir);
iput(inode);
- return NULL;
+ *res_inode = NULL;
+ return -ELOOP;
}
iput(inode);
__asm__("mov %0,%%fs"::"r" ((unsigned short) 0x10));
current->link_count++;
- inode = _namei(bh->b_data,dir,1);
+ error = open_namei(bh->b_data,flag,mode,res_inode,dir);
current->link_count--;
__asm__("mov %0,%%fs"::"r" (fs));
brelse(bh);
- return inode;
+ return error;
}
static int minix_readlink(struct inode * inode, char * buffer, int buflen)
# Note 2! The CFLAGS definitions are now in the main makefile...
.c.s:
- $(CC) $(CFLAGS) \
- -S -o $*.s $<
+ $(CC) $(CFLAGS) -S $<
.c.o:
- $(CC) $(CFLAGS) -c -o $*.o $<
+ $(CC) $(CFLAGS) -c $<
.s.o:
$(AS) -o $*.o $<
#include <linux/fcntl.h>
#include <linux/stat.h>
-struct inode * _namei(const char * filename, struct inode * base,
- int follow_links);
-
#define ACC_MODE(x) ("\004\002\006\377"[(x)&O_ACCMODE])
/*
return dir->i_op->lookup(dir,name,len,result);
}
-struct inode * follow_link(struct inode * dir, struct inode * inode)
+int follow_link(struct inode * dir, struct inode * inode,
+ int flag, int mode, struct inode ** res_inode)
{
if (!dir || !inode) {
iput(dir);
iput(inode);
- return NULL;
+ *res_inode = NULL;
+ return -ENOENT;
}
if (!inode->i_op || !inode->i_op->follow_link) {
iput(dir);
- return inode;
+ *res_inode = inode;
+ return 0;
}
- return inode->i_op->follow_link(dir,inode);
+ return inode->i_op->follow_link(dir,inode,flag,mode,res_inode);
}
/*
* dir_namei() returns the inode of the directory of the
* specified name, and the name within that directory.
*/
-static struct inode * dir_namei(const char * pathname,
- int * namelen, const char ** name, struct inode * base)
+static int dir_namei(const char * pathname, int * namelen, const char ** name,
+ struct inode * base, struct inode ** res_inode)
{
char c;
const char * thisname;
int len,error;
struct inode * inode;
+ *res_inode = NULL;
if (!base) {
base = current->pwd;
base->i_count++;
error = lookup(base,thisname,len,&inode);
if (error) {
iput(base);
- return NULL;
+ return error;
}
- if (!(base = follow_link(base,inode)))
- return NULL;
+ error = follow_link(base,inode,0,0,&base);
+ if (error)
+ return error;
}
*name = thisname;
*namelen = len;
- return base;
+ *res_inode = base;
+ return 0;
}
-struct inode * _namei(const char * pathname, struct inode * base,
- int follow_links)
+static int _namei(const char * pathname, struct inode * base,
+ int follow_links, struct inode ** res_inode)
{
const char * basename;
int namelen,error;
struct inode * inode;
- if (!(base = dir_namei(pathname,&namelen,&basename,base)))
- return NULL;
+ *res_inode = NULL;
+ error = dir_namei(pathname,&namelen,&basename,base,&base);
+ if (error)
+ return error;
base->i_count++; /* lookup uses up base */
error = lookup(base,basename,namelen,&inode);
if (error) {
iput(base);
- return NULL;
+ return error;
}
- if (follow_links)
- inode = follow_link(base,inode);
- else
+ if (follow_links) {
+ error = follow_link(base,inode,0,0,&inode);
+ if (error)
+ return error;
+ } else
iput(base);
- return inode;
+ *res_inode = inode;
+ return 0;
}
-struct inode * lnamei(const char * pathname)
+int lnamei(const char * pathname, struct inode ** res_inode)
{
- return _namei(pathname, NULL, 0);
+ return _namei(pathname,NULL,0,res_inode);
}
/*
* Open, link etc use their own routines, but this is enough for things
* like 'chmod' etc.
*/
-struct inode * namei(const char * pathname)
+int namei(const char * pathname, struct inode ** res_inode)
{
- return _namei(pathname,NULL,1);
+ return _namei(pathname,NULL,1,res_inode);
}
/*
* namei for open - this is in fact almost the whole open-routine.
*/
int open_namei(const char * pathname, int flag, int mode,
- struct inode ** res_inode)
+ struct inode ** res_inode, struct inode * base)
{
const char * basename;
int namelen,error,i;
flag |= O_WRONLY;
mode &= 07777 & ~current->umask;
mode |= I_REGULAR;
- if (!(dir = dir_namei(pathname,&namelen,&basename,NULL)))
- return -ENOENT;
+ error = dir_namei(pathname,&namelen,&basename,base,&dir);
+ if (error)
+ return error;
if (!namelen) { /* special case: '/usr/' etc */
if (!(flag & (O_ACCMODE|O_CREAT|O_TRUNC))) {
*res_inode=dir;
iput(inode);
return -EEXIST;
}
- if (!(inode = follow_link(dir,inode)))
- return -ELOOP;
+ if (error = follow_link(dir,inode,flag,mode,&inode))
+ return error;
if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
if (IS_NODEV(inode)) {
iput(inode);
int do_mknod(const char * filename, int mode, int dev)
{
const char * basename;
- int namelen;
+ int namelen, error;
struct inode * dir;
-
- if (!(dir = dir_namei(filename,&namelen,&basename, NULL)))
- return -ENOENT;
+
+ error = dir_namei(filename,&namelen,&basename, NULL, &dir);
+ if (error)
+ return error;
if (!namelen) {
iput(dir);
return -ENOENT;
int sys_mkdir(const char * pathname, int mode)
{
const char * basename;
- int namelen;
+ int namelen, error;
struct inode * dir;
- if (!(dir = dir_namei(pathname,&namelen,&basename, NULL)))
- return -ENOENT;
+ error = dir_namei(pathname,&namelen,&basename,NULL,&dir);
+ if (error)
+ return error;
if (!namelen) {
iput(dir);
return -ENOENT;
int sys_rmdir(const char * name)
{
const char * basename;
- int namelen;
+ int namelen, error;
struct inode * dir;
- if (!(dir = dir_namei(name,&namelen,&basename, NULL)))
- return -ENOENT;
+ error = dir_namei(name,&namelen,&basename,NULL,&dir);
+ if (error)
+ return error;
if (!namelen) {
iput(dir);
return -ENOENT;
int sys_unlink(const char * name)
{
const char * basename;
- int namelen;
+ int namelen, error;
struct inode * dir;
- if (!(dir = dir_namei(name,&namelen,&basename, NULL)))
- return -ENOENT;
+ error = dir_namei(name,&namelen,&basename,NULL,&dir);
+ if (error)
+ return error;
if (!namelen) {
iput(dir);
return -EPERM;
{
struct inode * dir;
const char * basename;
- int namelen;
+ int namelen, error;
- dir = dir_namei(newname,&namelen,&basename, NULL);
- if (!dir)
- return -ENOENT;
+ error = dir_namei(newname,&namelen,&basename,NULL,&dir);
+ if (error)
+ return error;
if (!namelen) {
iput(dir);
return -ENOENT;
{
struct inode * oldinode, * dir;
const char * basename;
- int namelen;
+ int namelen, error;
- oldinode = namei(oldname);
- if (!oldinode)
- return -ENOENT;
- dir = dir_namei(newname,&namelen,&basename, NULL);
- if (!dir) {
+ error = namei(oldname, &oldinode);
+ if (error)
+ return error;
+ error = dir_namei(newname,&namelen,&basename,NULL,&dir);
+ if (error) {
iput(oldinode);
- return -EACCES;
+ return error;
}
if (!namelen) {
iput(oldinode);
{
struct inode * old_dir, * new_dir;
const char * old_base, * new_base;
- int old_len, new_len;
+ int old_len, new_len, error;
- old_dir = dir_namei(oldname,&old_len,&old_base, NULL);
- if (!old_dir)
- return -ENOENT;
+ error = dir_namei(oldname,&old_len,&old_base,NULL,&old_dir);
+ if (error)
+ return error;
if (!permission(old_dir,MAY_WRITE)) {
iput(old_dir);
return -EACCES;
iput(old_dir);
return -EPERM;
}
- new_dir = dir_namei(newname,&new_len,&new_base, NULL);
- if (!new_dir) {
+ error = dir_namei(newname,&new_len,&new_base,NULL,&new_dir);
+ if (error) {
iput(old_dir);
- return -ENOENT;
+ return error;
}
if (!permission(new_dir,MAY_WRITE)) {
iput(old_dir);
#include <linux/tty.h>
#include <asm/segment.h>
+extern void fcntl_remove_locks(struct task_struct *, struct file *);
+
struct file_operations * chrdev_fops[MAX_CHRDEV] = {
NULL,
};
int sys_statfs(const char * path, struct statfs * buf)
{
struct inode * inode;
+ int error;
verify_area(buf, sizeof(struct statfs));
- if (!(inode = namei(path)))
- return -ENOENT;
+ error = namei(path,&inode);
+ if (error)
+ return error;
if (!inode->i_sb->s_op->statfs) {
iput(inode);
return -ENOSYS;
int sys_truncate(const char * path, unsigned int length)
{
struct inode * inode;
+ int error;
- if (!(inode = namei(path)))
- return -ENOENT;
+ error = namei(path,&inode);
+ if (error)
+ return error;
if (S_ISDIR(inode->i_mode) || !permission(inode,MAY_WRITE)) {
iput(inode);
return -EACCES;
{
struct inode * inode;
long actime,modtime;
+ int error;
- if (!(inode=namei(filename)))
- return -ENOENT;
+ error = namei(filename,&inode);
+ if (error)
+ return error;
if (IS_RDONLY(inode)) {
iput(inode);
return -EROFS;
int res, i_mode;
mode &= 0007;
- if (!(inode=namei(filename)))
- return -EACCES;
+ res = namei(filename,&inode);
+ if (res)
+ return res;
i_mode = res = inode->i_mode & 0777;
iput(inode);
if (current->uid == inode->i_uid)
int sys_chdir(const char * filename)
{
struct inode * inode;
+ int error;
- if (!(inode = namei(filename)))
- return -ENOENT;
+ error = namei(filename,&inode);
+ if (error)
+ return error;
if (!S_ISDIR(inode->i_mode)) {
iput(inode);
return -ENOTDIR;
int sys_chroot(const char * filename)
{
struct inode * inode;
+ int error;
- if (!(inode=namei(filename)))
- return -ENOENT;
+ error = namei(filename,&inode);
+ if (error)
+ return error;
if (!S_ISDIR(inode->i_mode)) {
iput(inode);
return -ENOTDIR;
int sys_chmod(const char * filename, mode_t mode)
{
struct inode * inode;
+ int error;
- if (!(inode = namei(filename)))
- return -ENOENT;
+ error = namei(filename,&inode);
+ if (error)
+ return error;
if ((current->euid != inode->i_uid) && !suser()) {
iput(inode);
return -EPERM;
int sys_chown(const char * filename, uid_t user, gid_t group)
{
struct inode * inode;
+ int error;
- if (!(inode = lnamei(filename)))
- return -ENOENT;
+ error = lnamei(filename,&inode);
+ if (error)
+ return error;
if (IS_RDONLY(inode)) {
iput(inode);
return -EROFS;
if (!f)
return -ENFILE;
current->filp[fd] = f;
- if ((i = open_namei(filename,flag,mode,&inode))<0) {
+ if ((i = open_namei(filename,flag,mode,&inode,NULL))<0) {
current->filp[fd]=NULL;
f->f_count--;
return i;
return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
}
-static int
-close_fp (struct file *filp)
+static int close_fp(struct file *filp)
{
- struct inode *inode;
+ struct inode *inode;
if (filp->f_count == 0) {
printk("Close: file count is 0\n");
return 0;
}
-
+ inode = filp->f_inode;
+ if (S_ISREG(inode->i_mode))
+ fcntl_remove_locks(current, filp);
if (filp->f_count > 1) {
filp->f_count--;
return 0;
}
-
- inode = filp->f_inode;
if (filp->f_op && filp->f_op->release)
filp->f_op->release(inode,filp);
-
filp->f_count--;
filp->f_inode = NULL;
iput(inode);
return (close_fp (filp));
}
-/* This routine looks through all the process's and closes any
- references to the current processes tty. To avoid problems with
- process sleeping on an inode which has already been iput, anyprocess
- which is sleeping on the tty is sent a sigkill (It's probably a rogue
- process.) Also no process should ever have /dev/console as it's
- controlling tty, or have it open for reading. So we don't have to
- worry about messing with all the daemons abilities to write messages
- to the console. (Besides they should be using syslog.) */
-
-int
-sys_vhangup(void)
+/*
+ * This routine looks through all the process's and closes any
+ * references to the current processes tty. To avoid problems with
+ * process sleeping on an inode which has already been iput, anyprocess
+ * which is sleeping on the tty is sent a sigkill (It's probably a rogue
+ * process.) Also no process should ever have /dev/console as it's
+ * controlling tty, or have it open for reading. So we don't have to
+ * worry about messing with all the daemons abilities to write messages
+ * to the console. (Besides they should be using syslog.)
+ */
+int sys_vhangup(void)
{
- int i;
- int j;
- struct file *filep;
- struct tty_struct *tty;
- extern void kill_wait (struct wait_queue **q, int signal);
- extern int kill_pg (int pgrp, int sig, int priv);
-
- if (!suser()) return (-EPERM);
-
- /* send the SIGHUP signal. */
- kill_pg (current->pgrp, SIGHUP, 0);
+ int i,j;
+ struct file *filep;
+ struct tty_struct *tty;
+ extern void kill_wait (struct wait_queue **q, int signal);
+ extern int kill_pg (int pgrp, int sig, int priv);
- /* See if there is a controlling tty. */
- if (current->tty < 0) return (0);
-
- for (i = 0; i < NR_TASKS; i++)
- {
- if (task[i] == NULL) continue;
- for (j = 0; j < NR_OPEN; j++)
- {
- filep = task[i]->filp[j];
-
- if (filep == NULL) continue;
-
- /* now we need to check to see if this file points to the
- device we are trying to close. */
-
- if (!S_ISCHR (filep->f_inode->i_mode)) continue;
-
- /* This will catch both /dev/tty and the explicit terminal
- device. However, we must make sure that f_rdev is
- defined and correct. */
-
- if ((MAJOR(filep->f_inode->i_rdev) == 5 ||
- MAJOR(filep->f_inode->i_rdev) == 4 ) &&
- (MAJOR(filep->f_rdev) == 4 &&
- MINOR(filep->f_rdev) == MINOR (current->tty)))
- {
- task[i]->filp[j] = NULL;
+ if (!suser())
+ return -EPERM;
+ /* send the SIGHUP signal. */
+ kill_pg(current->pgrp, SIGHUP, 0);
+ /* See if there is a controlling tty. */
+ if (current->tty < 0)
+ return 0;
+ for (i = 0; i < NR_TASKS; i++) {
+ if (task[i] == NULL)
+ continue;
+ for (j = 0; j < NR_OPEN; j++) {
+ filep = task[i]->filp[j];
+ if (!filep)
+ continue;
+ if (!S_ISCHR(filep->f_inode->i_mode))
+ continue;
+ if ((MAJOR(filep->f_inode->i_rdev) == 5 ||
+ MAJOR(filep->f_inode->i_rdev) == 4 ) &&
+ (MAJOR(filep->f_rdev) == 4 &&
+ MINOR(filep->f_rdev) == MINOR (current->tty))) {
/* so now we have found something to close. We
need to kill every process waiting on the
inode. */
-
- kill_wait (&filep->f_inode->i_wait, SIGKILL);
+ task[i]->filp[j] = NULL;
+ kill_wait (&filep->f_inode->i_wait, SIGKILL);
/* now make sure they are awake before we close the
file. */
- wake_up (&filep->f_inode->i_wait);
+ wake_up (&filep->f_inode->i_wait);
/* finally close the file. */
- current->close_on_exec &= ~(1<<j);
- close_fp (filep);
- }
-
- }
-
+ current->close_on_exec &= ~(1<<j);
+ close_fp (filep);
+ }
+ }
/* can't let them keep a reference to it around.
But we can't touch current->tty until after the
loop is complete. */
- if (task[i]->tty == current->tty && task[i] != current)
- {
- task[i]->tty = -1;
- }
- }
-
+ if (task[i]->tty == current->tty && task[i] != current) {
+ task[i]->tty = -1;
+ }
+ }
/* need to do tty->session = 0 */
- tty = TTY_TABLE(MINOR(current->tty));
- tty->session = 0;
- tty->pgrp = -1;
- current->tty = -1;
-
-
- return (0);
+ tty = TTY_TABLE(MINOR(current->tty));
+ tty->session = 0;
+ tty->pgrp = -1;
+ current->tty = -1;
+ return 0;
}
{
if (file->f_op && file->f_op->select)
return file->f_op->select(inode,file,SEL_IN,wait);
+ if (inode && S_ISREG(inode->i_mode))
+ return 1;
return 0;
}
{
if (file->f_op && file->f_op->select)
return file->f_op->select(inode,file,SEL_OUT,wait);
+ if (inode && S_ISREG(inode->i_mode))
+ return 1;
return 0;
}
{
if (file->f_op && file->f_op->select)
return file->f_op->select(inode,file,SEL_EX,wait);
+ if (inode && S_ISREG(inode->i_mode))
+ return 1;
return 0;
}
return -EBADF;
if (!current->filp[i]->f_inode)
return -EBADF;
- if (current->filp[i]->f_inode->i_pipe)
- continue;
- if (S_ISCHR(current->filp[i]->f_inode->i_mode))
- continue;
- if (S_ISFIFO(current->filp[i]->f_inode->i_mode))
- continue;
- if (S_ISSOCK(current->filp[i]->f_inode->i_mode))
- continue;
- return -EBADF;
}
repeat:
wait_table.nr = 0;
tmp.st_mtime = inode->i_mtime;
tmp.st_ctime = inode->i_ctime;
/*
- * Right now we fake the st_blocks numbers: we'll eventually have to
- * add st_blocks to the inode, and let the vfs routines keep track of
- * it all. This algorithm doesn't guarantee correct block numbers, but
- * at least it tries to come up with a plausible answer...
- *
- * In fact, the minix fs doesn't use these numbers (it uses 7 and 512
- * instead of 10 and 256), but who cares... It's not that exact anyway.
+ * st_blocks and st_blksize are approximated with a simple algorithm if
+ * they aren't supported directly by the filesystem. The minix and msdos
+ * filesystems don't keep track of blocks, so they would either have to
+ * be counted explicitly (by delving into the file itself), or by using
+ * this simple algorithm to get a reasonable (although not 100% accurate)
+ * value.
*/
- blocks = (tmp.st_size + 1023) / 1024;
- if (blocks > 10) {
- indirect = (blocks - 11)/256+1;
- if (blocks > 10+256) {
- indirect += (blocks - 267)/(256*256)+1;
- if (blocks > 10+256+256*256)
- indirect++;
+ if (!inode->i_blksize) {
+ blocks = (tmp.st_size + 511) / 512;
+ if (blocks > 10) {
+ indirect = (blocks - 11)/256+1;
+ if (blocks > 10+256) {
+ indirect += (blocks - 267)/(256*256)+1;
+ if (blocks > 10+256+256*256)
+ indirect++;
+ }
+ blocks += indirect;
}
- blocks += indirect;
+ tmp.st_blksize = 512;
+ tmp.st_blocks = blocks;
+ } else {
+ tmp.st_blksize = inode->i_blksize;
+ tmp.st_blocks = inode->i_blocks;
}
- tmp.st_blksize = 1024;
- tmp.st_blocks = blocks;
memcpy_tofs(statbuf,&tmp,sizeof(tmp));
}
int sys_stat(char * filename, struct old_stat * statbuf)
{
struct inode * inode;
+ int error;
- if (!(inode=namei(filename)))
- return -ENOENT;
+ error = namei(filename,&inode);
+ if (error)
+ return error;
cp_old_stat(inode,statbuf);
iput(inode);
return 0;
int sys_newstat(char * filename, struct new_stat * statbuf)
{
struct inode * inode;
+ int error;
- if (!(inode=namei(filename)))
- return -ENOENT;
+ error = namei(filename,&inode);
+ if (error)
+ return error;
cp_new_stat(inode,statbuf);
iput(inode);
return 0;
int sys_lstat(char * filename, struct old_stat * statbuf)
{
struct inode * inode;
+ int error;
- if (!(inode = lnamei(filename)))
- return -ENOENT;
+ error = lnamei(filename,&inode);
+ if (error)
+ return error;
cp_old_stat(inode,statbuf);
iput(inode);
return 0;
int sys_newlstat(char * filename, struct new_stat * statbuf)
{
struct inode * inode;
+ int error;
- if (!(inode = lnamei(filename)))
- return -ENOENT;
+ error = lnamei(filename,&inode);
+ if (error)
+ return error;
cp_new_stat(inode,statbuf);
iput(inode);
return 0;
int sys_readlink(const char * path, char * buf, int bufsiz)
{
struct inode * inode;
+ int error;
if (bufsiz <= 0)
return -EINVAL;
verify_area(buf,bufsiz);
- if (!(inode = lnamei(path)))
- return -ENOENT;
+ error = lnamei(path,&inode);
+ if (error)
+ return error;
if (!inode->i_op || !inode->i_op->readlink) {
iput(inode);
return -EINVAL;
int sync_dev(int dev);
void wait_for_keypress(void);
+void fcntl_init_locks(void);
/* set_bit uses setb, as gas doesn't recognize setc */
#define set_bit(bitnr,addr) ({ \
if (!suser())
return -EPERM;
- if (!(inode = namei(dev_name)))
- return -ENOENT;
+ retval = namei(dev_name,&inode);
+ if (retval)
+ return retval;
dev = inode->i_rdev;
if (!S_ISBLK(inode->i_mode)) {
iput(inode);
{
struct inode * inode, * dir_i;
struct super_block * sb;
+ int error;
- if (!(dir_i = namei(dir)))
- return -ENOENT;
+ error = namei(dir,&dir_i);
+ if (error)
+ return error;
if (dir_i->i_count != 1 || dir_i->i_mount) {
iput(dir_i);
return -EBUSY;
{
struct inode * inode;
int dev;
- int retval = 0;
+ int retval;
char tmp[100],*t;
int i;
unsigned long flags = 0;
if (!suser())
return -EPERM;
- if (!(inode = namei(dev_name)))
- return -ENOENT;
+ retval = namei(dev_name,&inode);
+ if (retval)
+ return retval;
dev = inode->i_rdev;
if (!S_ISBLK(inode->i_mode))
retval = -EPERM;
panic("bad i-node size");
for(i=0;i<NR_FILE;i++)
file_table[i].f_count=0;
+ fcntl_init_locks();
if (MAJOR(ROOT_DEV) == 2) {
printk("Insert root floppy and press ENTER");
wait_for_keypress();
*((gate_addr)+1) = (((base) & 0x0000ffff)<<16) | \
((limit) & 0x0ffff); }
-#define _set_tssldt_desc(n,addr,type) \
-__asm__ __volatile__ ("movw $232,%1\n\t" \
+#define _set_tssldt_desc(n,addr,limit,type) \
+__asm__ __volatile__ ("movw $" #limit ",%1\n\t" \
"movw %%ax,%2\n\t" \
"rorl $16,%%eax\n\t" \
"movb %%al,%3\n\t" \
"movb $0x00,%5\n\t" \
"movb %%ah,%6\n\t" \
"rorl $16,%%eax" \
- ::"a" (addr), "m" (*(n)), "m" (*(n+2)), "m" (*(n+4)), \
+ ::"a" (addr+0xc0000000), "m" (*(n)), "m" (*(n+2)), "m" (*(n+4)), \
"m" (*(n+5)), "m" (*(n+6)), "m" (*(n+7)) \
)
-#define set_tss_desc(n,addr) _set_tssldt_desc(((char *) (n)),addr,"0x89")
-#define set_ldt_desc(n,addr) _set_tssldt_desc(((char *) (n)),addr,"0x82")
+#define set_tss_desc(n,addr) _set_tssldt_desc(((char *) (n)),((int)(addr)),231,"0x89")
+#define set_ldt_desc(n,addr) _set_tssldt_desc(((char *) (n)),((int)(addr)),23,"0x82")
--- /dev/null
+#ifndef _EXT_FS_I
+#define _EXT_FS_I
+
+/*
+ * extended file system inode data in memory
+ */
+struct ext_inode_info {
+};
+
+#endif
struct buffer_head * b_reqnext; /* request queue */
};
+#include <linux/minix_fs_i.h>
+#include <linux/ext_fs_i.h>
+#include <linux/msdos_fs_i.h>
+
struct inode {
dev_t i_dev;
unsigned long i_ino;
time_t i_atime;
time_t i_mtime;
time_t i_ctime;
+ unsigned long i_blksize;
+ unsigned long i_blocks;
unsigned long i_data[16];
struct inode_operations * i_op;
struct super_block * i_sb;
struct wait_queue * i_wait;
struct wait_queue * i_wait2; /* for pipes */
+ struct file_lock *i_flock;
unsigned short i_count;
unsigned short i_flags;
unsigned char i_lock;
unsigned char i_mount;
unsigned char i_seek;
unsigned char i_update;
+ union {
+ struct minix_inode_info minix_i;
+ struct ext_inode_info ext_i;
+ struct msdos_inode_info msdos_i;
+ } u;
};
struct file {
off_t f_pos;
};
+struct file_lock {
+ struct file_lock *fl_next; /* singly linked list */
+ struct task_struct *fl_owner; /* NULL if on free list, for sanity checks */
+ struct wait_queue *fl_wait;
+ char fl_type;
+ char fl_whence;
+ off_t fl_start;
+ off_t fl_end;
+};
+
#include <linux/minix_fs_sb.h>
#include <linux/ext_fs_sb.h>
#include <linux/msdos_fs_sb.h>
int (*mknod) (struct inode *,const char *,int,int,int);
int (*rename) (struct inode *,const char *,int,struct inode *,const char *,int);
int (*readlink) (struct inode *,char *,int);
- struct inode * (*follow_link) (struct inode *, struct inode *);
+ int (*follow_link) (struct inode *, struct inode *, int flag, int mode, struct inode ** res_inode);
int (*bmap) (struct inode *,int);
void (*truncate) (struct inode *);
};
extern void sync_inodes(void);
extern void wait_on(struct inode * inode);
extern int bmap(struct inode * inode,int block);
-extern struct inode * namei(const char * pathname);
-extern struct inode * lnamei(const char * pathname);
+extern int namei(const char * pathname, struct inode ** res_inode);
+extern int lnamei(const char * pathname, struct inode ** res_inode);
extern int permission(struct inode * inode,int mask);
-extern struct inode * _namei(const char * filename, struct inode * base,
- int follow_links);
extern int open_namei(const char * pathname, int flag, int mode,
- struct inode ** res_inode);
+ struct inode ** res_inode, struct inode * base);
extern int do_mknod(const char * filename, int mode, int dev);
extern void iput(struct inode * inode);
extern struct inode * iget(int dev,int nr);
unsigned long a,b;
} desc_table[256];
-extern unsigned long pg_dir[1024];
+extern unsigned long swapper_pg_dir[1024];
extern desc_table idt,gdt;
#define GDT_NUL 0
#define NR_FILE 128
#define NR_SUPER 8
#define NR_HASH 997
+#define NR_FILE_LOCKS 32
#define BLOCK_SIZE 1024
#define BLOCK_SIZE_BITS 10
#define MAX_CHRDEV 16
--- /dev/null
+#ifndef _MINIX_FS_I
+#define _MINIX_FS_I
+
+/*
+ * minix fs inode data in memory
+ */
+struct minix_inode_info {
+};
+
+#endif
/* memory.c */
extern unsigned long get_free_page(int priority);
-extern unsigned long put_dirty_page(unsigned long page,unsigned long address);
+extern unsigned long put_dirty_page(struct task_struct * tsk,unsigned long page,
+ unsigned long address);
extern void free_page(unsigned long addr);
-extern int free_page_tables(unsigned long from,unsigned long size);
-extern int copy_page_tables(unsigned long from,unsigned long to,long size);
+extern void free_page_tables(struct task_struct * tsk);
+extern void clear_page_tables(struct task_struct * tsk);
+extern int copy_page_tables(struct task_struct * new);
extern int unmap_page_range(unsigned long from, unsigned long size);
extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size,
int permiss);
extern void swap_in(unsigned long *table_ptr);
#define invalidate() \
-__asm__("movl %%eax,%%cr3"::"a" (0))
+__asm__ __volatile__("movl %%cr3,%%eax\n\tmovl %%eax,%%cr3":::"ax")
extern unsigned long low_memory;
extern unsigned long high_memory;
--- /dev/null
+#ifndef _MSDOS_FS_I
+#define _MSDOS_FS_I
+
+/*
+ * msdos file system inode data in memory
+ */
+struct msdos_inode_info {
+};
+
+#endif
#define HZ 100
+/*
+ * This is the maximum nr of tasks - change it if you need to
+ */
#define NR_TASKS 64
-#define TASK_SIZE 0x04000000
-#define LIBRARY_SIZE 0x00400000
+
+/*
+ * User space process size: 3GB. This is hardcoded into a few places,
+ * so don't change it unless you know what you are doing.
+ */
+#define TASK_SIZE 0xc0000000
/*
* Size of io_bitmap in longwords: 32 is ports 0-0x3ff.
*/
#define IO_BITMAP_SIZE 32
-#if (TASK_SIZE & 0x3fffff)
-#error "TASK_SIZE must be multiple of 4M"
-#endif
-
-#if (LIBRARY_SIZE & 0x3fffff)
-#error "LIBRARY_SIZE must be a multiple of 4M"
-#endif
-
-#if (LIBRARY_SIZE >= (TASK_SIZE/2))
-#error "LIBRARY_SIZE too damn big!"
-#endif
-
-#if (((TASK_SIZE>>16)*NR_TASKS) != 0x10000)
-#error "TASK_SIZE*NR_TASKS must be 4GB"
-#endif
-
-#define LIBRARY_OFFSET (TASK_SIZE - LIBRARY_SIZE)
-
#define CT_TO_SECS(x) ((x) / HZ)
#define CT_TO_USECS(x) (((x) % HZ) * 1000000/HZ)
#include <linux/time.h>
#include <linux/param.h>
#include <linux/resource.h>
+#include <linux/vm86.h>
#if (NR_OPEN > 32)
#error "Currently the close-on-exec-flags and select masks are in one long, max 32 files/proc"
long signal;
struct sigaction sigaction[32];
long blocked; /* bitmap of masked signals */
+ unsigned long saved_kernel_stack;
/* various fields */
int exit_code;
int dumpable:1;
unsigned short used_math;
unsigned short rss; /* number of resident pages */
char comm[8];
+ struct vm86_struct * vm86_info;
/* file system info */
int link_count;
int tty; /* -1 if no tty, so it must be signed */
struct inode * library;
unsigned long start;
unsigned long length;
+ unsigned long bss;
} libraries[MAX_SHARED_LIBS];
int numlibraries;
struct file * filp[NR_OPEN];
#define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */
/* Not implemented yet, only for 486*/
#define PF_PTRACED 0x00000010 /* set if ptrace (0) has been called. */
-#define PF_VM86 0x00000020 /* set if process can execute a vm86 */
- /* task. */
- /* not impelmented. */
/*
* INIT_TASK is used to set up the first task table, touch at
*/
#define INIT_TASK \
/* state etc */ { 0,15,15, \
-/* signals */ 0,{{},},0, \
+/* signals */ 0,{{},},0,0, \
/* ec,brk... */ 0,0,0,0,0,0,0,0, \
/* pid etc.. */ 0,0,0,0, \
/* suppl grps*/ {NOGROUP,}, \
/* math */ 0, \
/* rss */ 2, \
/* comm */ "swapper", \
+/* vm86_info */ NULL, \
/* fs info */ 0,-1,0022,NULL,NULL,NULL, \
/* libraries */ { { NULL, 0, 0}, }, 0, \
/* filp */ {NULL,}, 0, \
{ \
{0,0}, \
-/* ldt */ {0x9f,0xc0fa00}, \
- {0x9f,0xc0f200} \
+/* ldt */ {0x9f,0xc0c0fa00}, \
+ {0x9f,0xc0c0f200} \
}, \
-/*tss*/ {0,PAGE_SIZE+(long)&init_task,0x10,0,0,0,0,(long)&pg_dir,\
+/*tss*/ {0,PAGE_SIZE+(long)&init_task,0x10,0,0,0,0,(long)&swapper_pg_dir,\
0,0,0,0,0,0,0,0, \
0,0,0x17,0x17,0x17,0x17,0x17,0x17, \
_LDT(0),0x80000000,{0xffffffff}, \
extern int sys_iopl();
extern int sys_vhangup();
extern int sys_idle();
+extern int sys_vm86();
fn_ptr sys_call_table[] = { sys_setup, sys_exit, sys_fork, sys_read,
sys_write, sys_open, sys_close, sys_waitpid, sys_creat, sys_link,
sys_setpriority, sys_profil, sys_statfs, sys_fstatfs, sys_ioperm,
sys_socketcall, sys_syslog, sys_setitimer, sys_getitimer, sys_newstat,
sys_newlstat, sys_newfstat, sys_newuname, sys_iopl, sys_vhangup,
-sys_idle };
+sys_idle, sys_vm86 };
/* So we don't have to do any more manual updating.... */
int NR_syscalls = sizeof(sys_call_table)/sizeof(fn_ptr);
#define __NR_iopl 110
#define __NR_vhangup 111
#define __NR_idle 112
+#define __NR_vm86 113
extern int errno;
--- /dev/null
+#ifndef _LINUX_VM86_H
+#define _LINUX_VM86_H
+
+#define VM_MASK 0x00020000
+
+/*
+ * This is the stack-layout when we have done a "SAVE_ALL" from vm86
+ * mode - the main change is that the old segment descriptors aren't
+ * useful any more and are forced to be zero by the kernel (and the
+ * hardware when a trap occurs), and the real segment descriptors are
+ * at the end of the structure. Look at ptrace.h to see the "normal"
+ * setup.
+ */
+
+struct vm86_regs {
+/*
+ * normal regs, with special meaning for the segment descriptors..
+ */
+ long ebx;
+ long ecx;
+ long edx;
+ long esi;
+ long edi;
+ long ebp;
+ long eax;
+ long __null_ds;
+ long __null_es;
+ long __null_fs;
+ long __null_gs;
+ long orig_eax;
+ long eip;
+ long cs;
+ long eflags;
+ long esp;
+ long ss;
+/*
+ * these are specific to v86 mode:
+ */
+ long es;
+ long ds;
+ long fs;
+ long gs;
+};
+
+/*
+ * flags isn't even used yet: it's just there as an example of
+ * what kind of information we might want to give sys_vm86() (or
+ * want it to return to us).
+ */
+struct vm86_struct {
+ struct vm86_regs regs;
+ unsigned long flags;
+};
+
+#endif
sync
kernelsubdirs: dummy
- @for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE)) || exit; done
+ @for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE)) || exit; done
sys_call.s: sys_call.S
sys_call.o: sys_call.s
sched.o: sched.c
- $(CC) $(CFLAGS) -fno-omit-frame-pointer -c $<
+ $(CC) $(CFLAGS) $(PROFILING) -fno-omit-frame-pointer -c $<
clean:
rm -f core *.o *.a tmp_make sys_call.s
for i in *.c;do rm -f `basename $$i .c`.s;done
- for i in $(SUBDIRS); do (cd $$i; $(MAKE) clean); done
+ for i in $(SUBDIRS); do (cd $$i && $(MAKE) clean); done
dep:
sed '/\#\#\# Dependencies/q' < Makefile > tmp_make
for i in *.c;do $(CPP) -M $$i;done >> tmp_make
cp tmp_make Makefile
- for i in $(SUBDIRS); do (cd $$i; $(MAKE) dep) || exit; done
+ for i in $(SUBDIRS); do (cd $$i && $(MAKE) dep) || exit; done
dummy:
sync
scsisubdirs: dummy
- @for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE)) || exit; done
+ @for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE)) || exit; done
clean:
rm -f core *.o *.a tmp_make
for i in *.c;do rm -f `basename $$i .c`.s;done
- for i in $(SUBDIRS); do (cd $$i; $(MAKE) clean); done
+ for i in $(SUBDIRS); do (cd $$i && $(MAKE) clean); done
dep:
sed '/\#\#\# Dependencies/q' < Makefile > tmp_make
for i in *.c;do $(CPP) -M $$i;done >> tmp_make
cp tmp_make Makefile
- for i in $(SUBDIRS); do (cd $$i; $(MAKE) dep); done
+ for i in $(SUBDIRS); do (cd $$i && $(MAKE) dep); done
dummy:
extern int * blk_size[NR_BLK_DEV];
+extern unsigned long hd_init(unsigned long mem_start, unsigned long mem_end);
extern int is_read_only(int dev);
extern void set_device_ro(int dev,int flag);
static void recal_intr(void);
static void bad_rw_intr(void);
-static int recalibrate = 0;
+static char recalibrate[ MAX_HD ] = { 0, };
+
static int reset = 0;
#if (HD_DELAY > 0)
static void bad_rw_intr(void)
{
+ int i;
+
if (!CURRENT)
return;
if (++CURRENT->errors >= MAX_ERRORS)
else if (CURRENT->errors > MAX_ERRORS/2)
reset = 1;
else
- recalibrate = 1;
+ for (i=0; i < NR_HD; i++)
+ recalibrate[i] = 1;
}
static inline int wait_DRQ(void)
static void do_hd_request(void)
{
unsigned int block,dev;
- unsigned int sec,head,cyl;
+ unsigned int sec,head,cyl,track;
unsigned int nsect;
repeat:
}
block += hd[dev].start_sect;
dev >>= 6;
- sec = block % hd_info[dev].sect;
- block /= hd_info[dev].sect;
- head = block % hd_info[dev].head;
- cyl = block / hd_info[dev].head;
- sec++;
+ sec = block % hd_info[dev].sect + 1;
+ track = block / hd_info[dev].sect;
+ head = track % hd_info[dev].head;
+ cyl = track / hd_info[dev].head;
#ifdef DEBUG
printk("hd%d : cyl = %d, head = %d, sector = %d, buffer = %08x\n",
dev, cyl, head, sec, CURRENT->buffer);
#endif
cli();
if (reset) {
- recalibrate = 1;
+ int i;
+
+ for (i=0; i < NR_HD; i++)
+ recalibrate[i] = 1;
reset_hd();
sti();
return;
}
- if (recalibrate) {
- recalibrate = 0;
+ if (recalibrate[dev]) {
+ recalibrate[dev] = 0;
hd_out(dev,hd_info[dev].sect,0,0,0,WIN_RESTORE,&recal_intr);
if (reset)
goto repeat;
}
port_write(HD_DATA,CURRENT->buffer,256);
sti();
- } else if (CURRENT->cmd == READ) {
+ return;
+ }
+ if (CURRENT->cmd == READ) {
hd_out(dev,nsect,sec,head,cyl,WIN_READ,&read_intr);
if (reset)
goto repeat;
sti();
- } else
- panic("unknown hd-command");
+ return;
+ }
+ panic("unknown hd-command");
}
static int hd_ioctl(struct inode * inode, struct file * file,
sync_dev(inode->i_rdev);
}
-
static void hd_geninit();
static struct gendisk hd_gendisk = {
static void hd_geninit(void)
{
- int drive;
+ int drive, i;
#ifndef HD_TYPE
extern struct drive_info drive_info;
void *BIOS = (void *) &drive_info;
- int cmos_disks, i;
+ int cmos_disks;
for (drive=0 ; drive<2 ; drive++) {
hd_info[drive].cyl = *(unsigned short *) BIOS;
NULL
};
-unsigned long hd_init(unsigned long mem_start)
+unsigned long hd_init(unsigned long mem_start, unsigned long mem_end)
{
blk_dev[MAJOR_NR].request_fn = DEVICE_REQUEST;
blkdev_fops[MAJOR_NR] = &hd_fops;
* add-request adds a request to the linked list.
* It disables interrupts so that it can muck with the
* request-lists in peace.
- *
- * Note that swapping requests always go before other requests,
- * and are done in the order they appear.
*/
static void add_request(struct blk_dev_struct * dev, struct request * req)
{
return;
}
for ( ; tmp->next ; tmp = tmp->next) {
- if (!req->bh)
- if (tmp->next->bh)
- break;
- else
- continue;
if ((IN_ORDER(tmp,req) ||
!IN_ORDER(tmp,tmp->next)) &&
IN_ORDER(req,tmp->next))
sti();
goto repeat;
-found: sti();
+found:
/* fill up the request-info, and add it to the queue */
req->dev = bh->b_dev;
+ sti();
req->cmd = rw;
req->errors = 0;
req->sector = sector;
while (count > 0) {
if (current->signal & ~current->blocked)
break;
- pde = (unsigned long) pg_dir + (addr >> 20 & 0xffc);
+ pde = current->tss.cr3 + (addr >> 20 & 0xffc);
pte = *(unsigned long *) pde;
if (!(pte & PAGE_PRESENT))
break;
while (count > 0) {
if (current->signal & ~current->blocked)
break;
- pde = (unsigned long) pg_dir + (addr >> 20 & 0xffc);
+ pde = current->tss.cr3 + (addr >> 20 & 0xffc);
pte = *(unsigned long *) pde;
if (!(pte & PAGE_PRESENT))
break;
int i;
fake_volatile:
- free_page_tables(get_base(current->ldt[1]),get_limit(0x0f));
- free_page_tables(get_base(current->ldt[2]),get_limit(0x17));
+ free_page_tables(current);
for (i=0 ; i<NR_OPEN ; i++)
if (current->filp[i])
sys_close(i);
}
if (data_limit < code_limit)
panic("Bad data_limit");
- new_data_base = new_code_base = nr * TASK_SIZE;
+ new_data_base = old_data_base;
+ new_code_base = old_code_base;
p->start_code = new_code_base;
set_base(p->ldt[1],new_code_base);
set_base(p->ldt[2],new_data_base);
- if (copy_page_tables(old_data_base,new_data_base,data_limit)) {
- free_page_tables(new_data_base,data_limit);
- return -ENOMEM;
- }
- return 0;
+ return copy_page_tables(p);
}
static int find_empty_process(void)
else
I387.swd &= 0x7fff;
ORIG_EIP = EIP;
+/* We cannot handle emulation in v86-mode */
+ if (EFLAGS & 0x00020000)
+ math_abort(info,SIGILL);
/* 0x0007 means user code space */
if (CS != 0x000F) {
printk("math_emulate: %04x:%08x\n\r",CS,EIP);
{
unsigned long page;
- addr += tsk->start_code;
repeat:
page = tsk->tss.cr3 + ((addr >> 20) & 0xffc);
page = *(unsigned long *) page;
{
unsigned long page;
- addr += tsk->start_code;
repeat:
page = tsk->tss.cr3 + ((addr >> 20) & 0xffc);
page = *(unsigned long *) page;
* irq uses this to decide if it should update the user or system
* times.
*/
-static void do_timer(int regs)
+static void do_timer(struct pt_regs * regs)
{
unsigned long mask;
struct timer_struct *tp = timer_table+0;
static int avg_cnt = 0;
jiffies++;
- if (3 & ((struct pt_regs *) regs)->cs) {
+ if ((VM_MASK & regs->eflags) || (3 & regs->cs)) {
current->utime++;
/* Update ITIMER_VIRT for current task if not in a system call */
if (current->it_virt_value && !(--current->it_virt_value)) {
current->stime++;
#ifdef PROFILE_SHIFT
if (prof_buffer && current != task[0]) {
- unsigned long eip = ((struct pt_regs *) regs)->eip;
+ unsigned long eip = regs->eip;
eip >>= PROFILE_SHIFT;
if (eip < prof_len)
prof_buffer[eip]++;
outb_p(0x36,0x43); /* binary, mode 3, LSB/MSB, ch 0 */
outb_p(LATCH & 0xff , 0x40); /* LSB */
outb(LATCH >> 8 , 0x40); /* MSB */
- request_irq(TIMER_IRQ,do_timer);
+ request_irq(TIMER_IRQ,(void (*)(int)) do_timer);
}
int longs;
unsigned long * tmp_esp;
-#ifdef notdef
- printk("pid: %d, signr: %x, eax=%d, oeax = %d, int=%d\n",
- current->pid, signr, regs->eax, regs->orig_eax,
- sa->sa_flags & SA_INTERRUPT);
-#endif
sa_handler = (unsigned long) sa->sa_handler;
if ((regs->orig_eax != -1) &&
((regs->eax == -ERESTARTSYS) || (regs->eax == -ERESTARTNOINTR))) {
#include <linux/utsname.h>
#include <linux/param.h>
#include <linux/resource.h>
+#include <linux/signal.h>
#include <linux/string.h>
+#include <linux/ptrace.h>
#include <asm/segment.h>
/*
- * this indicates wether you can reboot with ctrl-alt-del: the deault is yes
+ * this indicates wether you can reboot with ctrl-alt-del: the default is yes
*/
static int C_A_D = 1;
return -ENOSYS;
}
+unsigned long save_v86_state(int signr,struct vm86_regs * regs)
+{
+ unsigned long stack;
+
+ if (!current->vm86_info) {
+ printk("no vm86_info: BAD\n");
+ do_exit(SIGSEGV);
+ }
+ memcpy_tofs(&(current->vm86_info->regs),regs,sizeof(*regs));
+ stack = current->tss.esp0;
+ current->tss.esp0 = current->saved_kernel_stack;
+ current->saved_kernel_stack = 0;
+ return stack;
+}
+
+int sys_vm86(struct vm86_struct * v86)
+{
+ struct vm86_struct info;
+ struct pt_regs * pt_regs = (struct pt_regs *) &v86;
+
+ if (current->saved_kernel_stack)
+ return -EPERM;
+ memcpy_fromfs(&info,v86,sizeof(info));
+/*
+ * make sure the vm86() system call doesn't try to do anything silly
+ */
+ info.regs.__null_ds = 0;
+ info.regs.__null_es = 0;
+ info.regs.__null_fs = 0;
+ info.regs.__null_gs = 0;
+/*
+ * The eflags register is also special: we cannot trust that the user
+ * has set it up safely, so this makes sure interrupt etc flags are
+ * inherited from protected mode.
+ */
+ info.regs.eflags &= 0x00000dd5;
+ info.regs.eflags |= 0xfffff22a & pt_regs->eflags;
+ info.regs.eflags |= VM_MASK;
+ current->saved_kernel_stack = current->tss.esp0;
+ current->tss.esp0 = (unsigned long) pt_regs;
+ current->vm86_info = v86;
+ __asm__ __volatile__("movl %0,%%esp\n\t"
+ "pushl $ret_from_sys_call\n\t"
+ "ret"::"g" ((long) &(info.regs)),"a" (info.regs.eax));
+ return 0;
+}
+
extern void hard_reset_now(void);
/*
OLDESP = 0x3C
OLDSS = 0x40
+IF_MASK = 0x00000200
+NT_MASK = 0x00004000
+VM_MASK = 0x00020000
+
/*
* these are offsets into the task-struct.
*/
signal = 12
sigaction = 16 # MUST be 16 (=len of sigaction)
blocked = (33*16)
+saved_kernel_stack = ((33*16)+4)
/*
* offsets within sigaction
movl %eax,EAX(%esp) # save the return value
.align 4,0x90
ret_from_sys_call:
+ movl EFLAGS(%esp),%eax
+ testl $VM_MASK,%eax
+ jne 1f
cmpw $0x0f,CS(%esp) # was old code segment supervisor ?
jne 2f
cmpw $0x17,OLDSS(%esp) # was stack segment = 0x17 ?
jne 2f
-1: cmpl $0,_need_resched
+1: orl $IF_MASK,%eax # these just try to make sure
+ andl $~NT_MASK,%eax # the program doesn't do anything
+ movl %eax,EFLAGS(%esp) # stupid
+ cmpl $0,_need_resched
jne reschedule
movl _current,%eax
cmpl _task,%eax # task[0] cannot have signals
bsfl %ecx,%ecx
je 2f
btrl %ecx,%ebx
+ incl %ecx
movl %ebx,signal(%eax)
movl %esp,%ebx
+ testl $VM_MASK,EFLAGS(%esp)
+ je 3f
pushl %ebx
- incl %ecx
+ pushl %ecx
+ call _save_v86_state
+ popl %ecx
+ movl %eax,%ebx
+ movl %eax,%esp
+3: pushl %ebx
pushl %ecx
call _do_signal
popl %ecx
long * esp = (long *) esp_ptr;
int i;
- if ((0xffff & esp[1]) == 0xf)
+ if ((esp[2] & VM_MASK) || ((0xffff & esp[1]) == 0xf))
return;
printk("%s: %04x\n\r",str,nr&0xffff);
printk("EIP: %04x:%p\nEFLAGS: %p\n", 0xffff & esp[1],esp[0],esp[2]);
#include <linux/sched.h>
#include <linux/head.h>
#include <linux/kernel.h>
+#include <linux/errno.h>
#include <linux/string.h>
-#define CODE_SPACE(addr) ((((addr)+4095)&~4095) < \
-current->start_code + current->end_code)
-
unsigned long low_memory = 0;
unsigned long high_memory = 0;
unsigned long free_page_list = 0;
printk("trying to free free page (%08x): memory probably corrupted\n",addr);
}
+static void free_one_table(unsigned long * page_dir)
+{
+ int j;
+ unsigned long pg_table = *page_dir;
+ unsigned long * page_table;
+
+ if (!pg_table)
+ return;
+ if (!(pg_table & 1)) {
+ printk("Bad page table: [%08x]=%08x\n",page_dir,pg_table);
+ *page_dir = 0;
+ return;
+ }
+ *page_dir = 0;
+ if (pg_table < low_memory)
+ return;
+ page_table = (unsigned long *) (pg_table & 0xfffff000);
+ for (j = 0 ; j < 1024 ; j++,page_table++) {
+ unsigned long pg = *page_table;
+
+ if (!pg)
+ continue;
+ *page_table = 0;
+ if (1 & pg)
+ free_page(0xfffff000 & pg);
+ else
+ swap_free(pg >> 1);
+ }
+ free_page(0xfffff000 & pg_table);
+}
+
/*
- * This function frees a continuos block of page tables, as needed
- * by 'exit()'. As does copy_page_tables(), this handles only 4Mb blocks.
+ * This function clears all user-level page tables of a process - this
+ * is needed by execve(), so that old pages aren't in the way. Note that
+ * unlike 'free_page_tables()', this function still leaves a valid
+ * page-table-tree in memory: it just removes the user pages. The two
+ * functions are similar, but there is a fundamental difference.
*/
-int free_page_tables(unsigned long from,unsigned long size)
+void clear_page_tables(struct task_struct * tsk)
{
- unsigned long page;
- unsigned long page_dir;
- unsigned long *pg_table;
- unsigned long * dir, nr;
+ int i;
+ unsigned long * page_dir;
- if (from & 0x3fffff)
- panic("free_page_tables called with wrong alignment");
- if (!from)
+ if (!tsk)
+ return;
+ if (tsk == task[0])
+ panic("task[0] (swapper) doesn't support exec() yet\n");
+ page_dir = (unsigned long *) tsk->tss.cr3;
+ if (!page_dir) {
+ printk("Trying to clear kernel page-directory: not good\n");
+ return;
+ }
+ for (i = 0 ; i < 768 ; i++,page_dir++)
+ free_one_table(page_dir);
+ invalidate();
+ return;
+}
+
+/*
+ * This function frees up all page tables of a process when it exits.
+ */
+void free_page_tables(struct task_struct * tsk)
+{
+ int i;
+ unsigned long pg_dir;
+ unsigned long * page_dir;
+
+ if (!tsk)
+ return;
+ if (tsk == task[0]) {
+ printk("task[0] (swapper) killed: unable to recover\n");
panic("Trying to free up swapper memory space");
- size = (size + 0x3fffff) >> 22;
- dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */
- for ( ; size-->0 ; dir++) {
- if (!(page_dir = *dir))
- continue;
- *dir = 0;
- if (!(page_dir & 1)) {
- printk("free_page_tables: bad page directory.");
- continue;
- }
- pg_table = (unsigned long *) (0xfffff000 & page_dir);
- for (nr=0 ; nr<1024 ; nr++,pg_table++) {
- if (!(page = *pg_table))
- continue;
- *pg_table = 0;
- if (1 & page)
- free_page(0xfffff000 & page);
- else
- swap_free(page >> 1);
- }
- free_page(0xfffff000 & page_dir);
}
+ pg_dir = tsk->tss.cr3;
+ if (!pg_dir) {
+ printk("Trying to free kernel page-directory: not good\n");
+ return;
+ }
+ tsk->tss.cr3 = (unsigned long) swapper_pg_dir;
+ if (tsk == current)
+ __asm__ __volatile__("movl %0,%%cr3"::"a" (tsk->tss.cr3));
+ page_dir = (unsigned long *) pg_dir;
+ for (i = 0 ; i < 1024 ; i++,page_dir++)
+ free_one_table(page_dir);
+ free_page(pg_dir);
invalidate();
- return 0;
}
/*
* 1 Mb-range, so the pages can be shared with the kernel. Thus the
* special case for nr=xxxx.
*/
-int copy_page_tables(unsigned long from,unsigned long to,long size)
+int copy_page_tables(struct task_struct * tsk)
{
- unsigned long * from_page_table;
- unsigned long * to_page_table;
- unsigned long this_page;
- unsigned long * from_dir, * to_dir;
- unsigned long new_page;
- unsigned long nr;
-
- if ((from&0x3fffff) || (to&0x3fffff))
- panic("copy_page_tables called with wrong alignment");
- from_dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */
- to_dir = (unsigned long *) ((to>>20) & 0xffc);
- size = ((unsigned) (size+0x3fffff)) >> 22;
- for( ; size-->0 ; from_dir++,to_dir++) {
- if (*to_dir)
- printk("copy_page_tables: already exist, "
- "probable memory corruption\n");
- if (!*from_dir)
+ int i;
+ unsigned long temp_page = 0;
+ unsigned long old_pg_dir, *old_page_dir;
+ unsigned long new_pg_dir, *new_page_dir;
+
+ old_pg_dir = current->tss.cr3;
+ new_pg_dir = get_free_page(GFP_KERNEL);
+ if (!new_pg_dir)
+ return -ENOMEM;
+ tsk->tss.cr3 = new_pg_dir;
+ old_page_dir = (unsigned long *) old_pg_dir;
+ new_page_dir = (unsigned long *) new_pg_dir;
+ for (i = 0 ; i < 1024 ; i++,old_page_dir++,new_page_dir++) {
+ int j;
+ unsigned long old_pg_table, *old_page_table;
+ unsigned long new_pg_table, *new_page_table;
+
+ old_pg_table = *old_page_dir;
+ if (!old_pg_table)
continue;
- if (!(1 & *from_dir)) {
+ if (!(1 & old_pg_table)) {
printk("copy_page_tables: page table swapped out, "
"probable memory corruption");
- *from_dir = 0;
+ *old_page_dir = 0;
+ continue;
+ }
+ if (old_pg_table < low_memory) {
+ *new_page_dir = old_pg_table;
continue;
}
- from_page_table = (unsigned long *) (0xfffff000 & *from_dir);
- if (!(to_page_table = (unsigned long *) get_free_page(GFP_KERNEL)))
- return -1; /* Out of memory, see freeing */
- *to_dir = ((unsigned long) to_page_table) | PAGE_ACCESSED | 7;
- nr = (from==0)?0xA0:1024;
- for ( ; nr-- > 0 ; from_page_table++,to_page_table++) {
+ new_pg_table = get_free_page(GFP_KERNEL);
+ if (!new_pg_table) {
+ free_page_tables(tsk);
+ free_page(temp_page);
+ return -ENOMEM;
+ }
+ *new_page_dir = new_pg_table | PAGE_ACCESSED | 7;
+ old_page_table = (unsigned long *) (0xfffff000 & old_pg_table);
+ new_page_table = (unsigned long *) (0xfffff000 & new_pg_table);
+ for (j = 0 ; j < 1024 ; j++,old_page_table++,new_page_table++) {
+ unsigned long pg;
repeat:
- this_page = *from_page_table;
- if (!this_page)
+ pg = *old_page_table;
+ if (!pg)
continue;
- if (!(1 & this_page)) {
- if (!(new_page = get_free_page(GFP_KERNEL)))
- return -1;
- ++current->rss;
- read_swap_page(this_page>>1, (char *) new_page);
- if (*from_page_table != this_page) {
- free_page(new_page);
- goto repeat;
- }
- *to_page_table = this_page;
- *from_page_table = new_page | (PAGE_DIRTY | PAGE_ACCESSED | 7);
+ if (pg & 1) {
+ pg &= ~2;
+ *new_page_table = pg;
+ if (pg < low_memory)
+ continue;
+ *old_page_table = pg;
+ mem_map[(pg-low_memory)>>12]++;
continue;
}
- this_page &= ~2;
- *to_page_table = this_page;
- if (this_page > low_memory) {
- *from_page_table = this_page;
- this_page -= low_memory;
- this_page >>= 12;
- if (!mem_map[this_page]++)
- --nr_free_pages;
+ if (!temp_page) {
+ temp_page = get_free_page(GFP_KERNEL);
+ if (!temp_page) {
+ free_page_tables(tsk);
+ return -ENOMEM;
+ }
+ goto repeat;
}
+ ++current->rss;
+ read_swap_page(pg>>1, (char *) temp_page);
+ if (*old_page_table != pg)
+ goto repeat;
+ *new_page_table = pg;
+ *old_page_table = temp_page | (PAGE_DIRTY | PAGE_ACCESSED | 7);
+ temp_page = 0;
}
}
+ free_page(temp_page);
invalidate();
return 0;
}
if (!from)
panic("unmap_page_range trying to free swapper memory space");
size = (size + 0xfff) >> 12;
- dir = (unsigned long *) ((from >> 20) & 0xffc); /* _pg_dir = 0 */
+ dir = (unsigned long *) (current->tss.cr3 + ((from >> 20) & 0xffc));
poff = (from >> 12) & 0x3ff;
if ((pcnt = 1024 - poff) > size)
pcnt = size;
if ((from & 0xfff) || (to & 0xfff))
panic("remap_page_range called with wrong alignment");
- dir = (unsigned long *) ((from >> 20) & 0xffc); /* _pg_dir = 0 */
+ dir = (unsigned long *) (current->tss.cr3 + ((from >> 20) & 0xffc));
size = (size + 0xfff) >> 12;
poff = (from >> 12) & 0x3ff;
if ((pcnt = 1024 - poff) > size)
* out of memory (either when trying to access page-table or
* page.)
*/
-static unsigned long put_page(unsigned long page,unsigned long address)
+static unsigned long put_page(struct task_struct * tsk,unsigned long page,unsigned long address)
{
unsigned long tmp, *page_table;
printk("put_page: mem_map disagrees with %p at %p\n",page,address);
return 0;
}
- page_table = (unsigned long *) ((address>>20) & 0xffc);
+ page_table = (unsigned long *) (tsk->tss.cr3 + ((address>>20) & 0xffc));
if ((*page_table)&1)
page_table = (unsigned long *) (0xfffff000 & *page_table);
else {
tmp = get_free_page(GFP_KERNEL);
if (!tmp) {
- oom(current);
+ oom(tsk);
tmp = BAD_PAGETABLE;
}
*page_table = tmp | PAGE_ACCESSED | 7;
* and we want the dirty-status to be correct (for VM). Thus the same
* routine, but this time we mark it dirty too.
*/
-unsigned long put_dirty_page(unsigned long page, unsigned long address)
+unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
{
unsigned long tmp, *page_table;
printk("put_dirty_page: trying to put page %p at %p\n",page,address);
if (mem_map[(page-low_memory)>>12] != 1)
printk("mem_map disagrees with %p at %p\n",page,address);
- page_table = (unsigned long *) ((address>>20) & 0xffc);
+ page_table = (unsigned long *) (tsk->tss.cr3 + ((address>>20) & 0xffc));
if ((*page_table)&1)
page_table = (unsigned long *) (0xfffff000 & *page_table);
else {
{
unsigned long pde, pte, page;
- pde = (address>>20) & 0xffc;
+ pde = tsk->tss.cr3 + ((address>>20) & 0xffc);
pte = *(unsigned long *) pde;
if ((pte & 3) != 3) {
printk("do_wp_page: bogus page-table at address %08x (%08x)\n",address,pte);
send_sig(SIGSEGV, tsk, 1);
return;
}
- if (address < TASK_SIZE) {
- printk("do_wp_page: kernel WP error at address %08x (%08x)\n",address,pte);
- *(unsigned long *) pde = BAD_PAGETABLE | 7;
- send_sig(SIGSEGV, tsk, 1);
- return;
- }
pte &= 0xfffff000;
pte += (address>>10) & 0xffc;
page = *(unsigned long *) pte;
send_sig(SIGSEGV, tsk, 1);
return;
}
- ++current->min_flt;
+ tsk->min_flt++;
un_wp_page((unsigned long *) pte, tsk);
}
{
unsigned long page;
- page = *(unsigned long *) ((address>>20) & 0xffc);
+ page = *(unsigned long *) (current->tss.cr3 + ((address>>20) & 0xffc));
if (!(page & PAGE_PRESENT))
return;
page &= 0xfffff000;
return;
}
-static void get_empty_page(unsigned long address)
+static void get_empty_page(struct task_struct * tsk, unsigned long address)
{
unsigned long tmp;
tmp = get_free_page(GFP_KERNEL);
if (!tmp) {
- oom(current);
+ oom(tsk);
tmp = BAD_PAGE;
}
- if (!put_page(tmp,address))
+ if (!put_page(tsk,tmp,address))
free_page(tmp);
}
* NOTE! This assumes we have checked that p != current, and that they
* share the same executable or library.
*/
-static int try_to_share(unsigned long address, struct task_struct * p)
+static int try_to_share(unsigned long address, struct task_struct * tsk,
+ struct task_struct * p)
{
unsigned long from;
unsigned long to;
unsigned long to_page;
unsigned long phys_addr;
- from_page = to_page = ((address>>20) & 0xffc);
- from_page += ((p->start_code>>20) & 0xffc);
- to_page += ((current->start_code>>20) & 0xffc);
+ from_page = p->tss.cr3 + ((address>>20) & 0xffc);
+ to_page = tsk->tss.cr3 + ((address>>20) & 0xffc);
/* is there a page-directory at from? */
from = *(unsigned long *) from_page;
if (!(from & 1))
* We first check if it is at all feasible by checking executable->i_count.
* It should be >1 if there are other tasks sharing this inode.
*/
-static int share_page(struct inode * inode, unsigned long address)
+static int share_page(struct task_struct * tsk, struct inode * inode, unsigned long address)
{
struct task_struct ** p;
int i;
for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
if (!*p)
continue;
- if (current == *p)
+ if (tsk == *p)
continue;
- if (address < LIBRARY_OFFSET) {
- if (inode != (*p)->executable)
- continue;
- } else {
+ if (inode != (*p)->executable) {
for (i=0; i < (*p)->numlibraries; i++)
if (inode == (*p)->libraries[i].library)
break;
if (i >= (*p)->numlibraries)
continue;
}
- if (try_to_share(address,*p))
+ if (try_to_share(address,tsk,*p))
return 1;
}
return 0;
unsigned int block,i;
struct inode * inode;
- if (address < TASK_SIZE) {
- printk("\n\rBAD!! KERNEL PAGE MISSING\n\r");
- do_exit(SIGSEGV);
- }
- if (address - tsk->start_code >= TASK_SIZE) {
- printk("Bad things happen: nonexistent page error in do_no_page\n\r");
- do_exit(SIGSEGV);
- }
- page = get_empty_pgtable((unsigned long *) ((address >> 20) & 0xffc));
+ page = get_empty_pgtable((unsigned long *) (tsk->tss.cr3 + ((address >> 20) & 0xffc)));
if (!page)
return;
page &= 0xfffff000;
return;
}
address &= 0xfffff000;
- tmp = address - tsk->start_code;
inode = NULL;
block = 0;
- if (tmp < tsk->end_data) {
+ if (address < tsk->end_data) {
inode = tsk->executable;
- block = 1 + tmp / BLOCK_SIZE;
+ block = 1 + address / BLOCK_SIZE;
} else {
i = tsk->numlibraries;
while (i-- > 0) {
- if (tmp < tsk->libraries[i].start)
+ if (address < tsk->libraries[i].start)
continue;
- block = tmp - tsk->libraries[i].start;
- if (block >= tsk->libraries[i].length)
+ block = address - tsk->libraries[i].start;
+ if (block >= tsk->libraries[i].length + tsk->libraries[i].bss)
continue;
inode = tsk->libraries[i].library;
- block = 1 + block / BLOCK_SIZE;
+ if (block < tsk->libraries[i].length)
+ block = 1 + block / BLOCK_SIZE;
+ else
+ block = 0;
break;
}
}
if (!inode) {
++tsk->min_flt;
- get_empty_page(address);
+ get_empty_page(tsk,address);
if (tsk != current)
return;
- if (tmp >= LIBRARY_OFFSET || tmp < tsk->brk)
+ if (address < tsk->brk)
return;
- if (tmp+8192 >= (user_esp & 0xfffff000))
+ if (address+8192 >= (user_esp & 0xfffff000))
return;
send_sig(SIGSEGV,tsk,1);
return;
}
- if (tsk == current)
- if (share_page(inode,tmp)) {
- ++tsk->min_flt;
- return;
- }
+ if (share_page(tsk,inode,address)) {
+ ++tsk->min_flt;
+ return;
+ }
++tsk->maj_flt;
page = get_free_page(GFP_KERNEL);
if (!page) {
oom(current);
- put_page(BAD_PAGE,address);
+ put_page(tsk,BAD_PAGE,address);
return;
}
- for (i=0 ; i<4 ; block++,i++)
- nr[i] = bmap(inode,block);
- bread_page(page,inode->i_dev,nr);
- i = tmp + 4096 - tsk->end_data;
+ if (block) {
+ for (i=0 ; i<4 ; block++,i++)
+ nr[i] = bmap(inode,block);
+ bread_page(page,inode->i_dev,nr);
+ }
+ i = address + 4096 - tsk->end_data;
if (i>4095)
i = 0;
tmp = page + 4096;
tmp--;
*(char *)tmp = 0;
}
- if (put_page(page,address))
+ if (put_page(tsk,page,address))
return;
free_page(page);
oom(current);
void show_mem(void)
{
- int i,j,k,free=0,total=0;
+ int i,free=0,total=0;
int shared = 0;
- unsigned long * pg_tbl;
printk("Mem-info:\n\r");
printk("Free pages: %6d\n",nr_free_pages);
}
printk("%d free pages of %d\n\r",free,total);
printk("%d pages shared\n\r",shared);
- printk("%d free pages via nr_free_pages\n\r", nr_free_pages);
- k = 0;
- for(i=4 ; i<1024 ;) {
- if (1&pg_dir[i]) {
- if (pg_dir[i]>high_memory) {
- printk("page directory[%d]: %08X\n\r",
- i,pg_dir[i]);
- i++;
- continue;
- }
- if (pg_dir[i]>low_memory)
- free++,k++;
- pg_tbl=(unsigned long *) (0xfffff000 & pg_dir[i]);
- for(j=0 ; j<1024 ; j++)
- if ((pg_tbl[j]&1) && pg_tbl[j]>low_memory)
- if (pg_tbl[j]>high_memory)
- printk("page_dir[%d][%d]: %08X\n\r",
- i,j, pg_tbl[j]);
- else
- k++,free++;
- }
- i++;
- if (!(i&15) && k) {
- k++,free++; /* one page/process for task_struct */
- printk("Process %d: %d pages\n\r",(i>>4)-1,k);
- k = 0;
- }
- }
- printk("Memory found: %d (%d)\n\r",free-shared,total);
}
-/* This routine handles page faults. It determines the address,
- and the problem then passes it off to one of the appropriate
- routines. */
+/*
+ * This routine handles page faults. It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ */
void do_page_fault(unsigned long *esp, unsigned long error_code)
{
unsigned long address;
user_esp = 0;
/* get the address */
__asm__("movl %%cr2,%0":"=r" (address));
- if (!(error_code & 1)) {
+ if (!(error_code & 1))
do_no_page(error_code, address, current, user_esp);
- return;
- } else {
+ else
do_wp_page(error_code, address, current, user_esp);
- return;
- }
}
unsigned long mem_init(unsigned long start_mem, unsigned long end_mem)
return 1;
}
-/*
- * We never page the pages in task[0] - kernel memory.
- * We page all other pages.
- */
-#define FIRST_VM_PAGE (TASK_SIZE>>12)
-#define LAST_VM_PAGE (1024*1024)
-#define VM_PAGES (LAST_VM_PAGE - FIRST_VM_PAGE)
-
-static unsigned int dir_entry = 1024;
-static unsigned int page_entry = 0;
+static int swap_task = 1;
+static int swap_table = 0;
+static int swap_page = 0;
/*
* sys_idle() does nothing much: it just searches for likely candidates for
unsigned long page;
need_resched = 1;
- if (dir_entry >= 1024)
- dir_entry = FIRST_VM_PAGE>>10;
- p = task[dir_entry >> 4];
- page = pg_dir[dir_entry];
- if (!(page & 1) || !p || !p->swappable) {
- dir_entry++;
+ if (swap_task >= NR_TASKS)
+ swap_task = 1;
+ p = task[swap_task];
+ if (!p || !p->swappable) {
+ swap_task++;
+ return 0;
+ }
+ if (swap_table >= 1024) {
+ swap_task++;
+ swap_table = 0;
+ return 0;
+ }
+ page = ((unsigned long *) p->tss.cr3)[swap_table];
+ if (!(page & 1) || (page < low_memory)) {
+ swap_table++;
return 0;
}
page &= 0xfffff000;
- if (page_entry >= 1024) {
- page_entry = 0;
- dir_entry++;
+ if (swap_page >= 1024) {
+ swap_page = 0;
+ swap_table++;
return 0;
}
- page = *(page_entry + (unsigned long *) page);
+ page = *(swap_page + (unsigned long *) page);
if ((page < low_memory) || !(page & PAGE_PRESENT) || (page & PAGE_ACCESSED))
- page_entry++;
+ swap_page++;
return 0;
}
*/
int swap_out(unsigned int priority)
{
- int counter = VM_PAGES / 2;
+ int counter = NR_TASKS;
int pg_table;
struct task_struct * p;
+ counter <<= priority;
+check_task:
+ if (counter-- < 0)
+ return 0;
+ if (swap_task >= NR_TASKS) {
+ swap_task = 1;
+ goto check_task;
+ }
+ p = task[swap_task];
+ if (!p || !p->swappable) {
+ swap_task++;
+ goto check_task;
+ }
check_dir:
- if (counter < 0)
- goto no_swap;
- if (dir_entry >= 1024)
- dir_entry = FIRST_VM_PAGE>>10;
- if (!(p = task[dir_entry >> 4]) || !p->swappable) {
- counter -= 1024;
- dir_entry++;
+ if (swap_table >= 1024) {
+ swap_table = 0;
+ swap_task++;
+ goto check_task;
+ }
+ pg_table = ((unsigned long *) p->tss.cr3)[swap_table];
+ if (pg_table < low_memory) {
+ swap_table++;
goto check_dir;
}
- if (!(1 & (pg_table = pg_dir[dir_entry]))) {
- if (pg_table) {
- printk("bad page-table at pg_dir[%d]: %08x\n\r",
- dir_entry,pg_table);
- pg_dir[dir_entry] = 0;
- }
- counter -= 1024;
- dir_entry++;
+ if (!(1 & pg_table)) {
+ printk("bad page-table at pg_dir[%d]: %08x\n\r",
+ swap_table,pg_table);
+ ((unsigned long *) p->tss.cr3)[swap_table] = 0;
+ swap_table++;
goto check_dir;
}
pg_table &= 0xfffff000;
check_table:
- if (counter < 0)
- goto no_swap;
- if (page_entry >= 1024) {
- page_entry = 0;
- dir_entry++;
+ if (swap_page >= 1024) {
+ swap_page = 0;
+ swap_table++;
goto check_dir;
}
- if (try_to_swap_out(page_entry + (unsigned long *) pg_table)) {
+ if (try_to_swap_out(swap_page + (unsigned long *) pg_table)) {
p->rss--;
return 1;
}
- page_entry++;
- counter--;
+ swap_page++;
goto check_table;
-no_swap:
- return 0;
}
static int try_to_free_page(void)
}
if (priority <= GFP_BUFFER)
return 0;
- if (try_to_free_page()) {
- schedule();
+ if (try_to_free_page())
goto repeat;
- }
return 0;
}
if (!suser())
return -EPERM;
- if (!(swap_inode = namei(specialfile)))
- return -ENOENT;
+ i = namei(specialfile,&swap_inode);
+ if (i)
+ return i;
if (swap_file || swap_device || swap_bitmap || swap_lockmap) {
iput(swap_inode);
return -EBUSY;
subdirs: dummy
- for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE)) || exit; done
+ for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE)) || exit; done
clean:
rm -f core *.o *.a tmp_make
sed '/\#\#\# Dependencies/q' < Makefile > tmp_make
for i in *.c;do $(CPP) -M $$i;done >> tmp_make
cp tmp_make Makefile
- @for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE) dep || exit; done
+ @for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE) dep) || exit; done
dummy:
set_fs(get_ds());
i = do_mknod(fname, S_IFSOCK | 0777, 0);
if (i == 0)
- i = open_namei(fname, 0, S_IFSOCK, &upd->inode);
+ i = open_namei(fname, 0, S_IFSOCK, &upd->inode, NULL);
set_fs(old_fs);
if (i < 0) {
printk("unix_proto_bind: can't open socket %s\n", fname);