[PATCH] Linux-0.97.2 (August 23, 1992)

author Linus Torvalds <torvalds@linuxfoundation.org>

Fri, 23 Nov 2007 20:09:04 +0000 (15:09 -0500)

committer Linus Torvalds <torvalds@linuxfoundation.org>

Fri, 23 Nov 2007 20:09:04 +0000 (15:09 -0500)
author Linus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:09:04 +0000 (15:09 -0500)
committer Linus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:09:04 +0000 (15:09 -0500)
diff --git a/Makefile b/Makefile

index d143eacc53319bb8ca611ac30038f9048d29065a..879e95cc5e87f53458c476342d57416df30f2e4e 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -94,7 +94,7 @@ SUBDIRS               =kernel mm fs net lib
  KERNELHDRS     =/usr/src/linux/include
  
  .c.s:
-       $(CC) $(CFLAGS) -S $<
+       $(CC) $(CFLAGS) -S -o $*.s $<
  .s.o:
         $(AS) -c -o $*.o $<
  .c.o:
@@ -103,11 +103,11 @@ KERNELHDRS        =/usr/src/linux/include
  all:   Version Image
  
  linuxsubdirs: dummy
-       @for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE)) || exit; done
+       @for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE)) || exit; done
  
  Version:
         @./makever.sh
-       @echo \#define UTS_RELEASE \"0.97-`cat .version`\" > include/linux/config_rel.h
+       @echo \#define UTS_RELEASE \"0.97.pl2-`cat .version`\" > include/linux/config_rel.h
         @echo \#define UTS_VERSION \"`date +%D`\" > include/linux/config_ver.h
         touch include/linux/config.h
  
@@ -127,6 +127,9 @@ tools/build: tools/build.c
  
  boot/head.o: boot/head.s
  
+init/main.o: init/main.c
+       $(CC) $(CFLAGS) $(PROFILING) -c -o $*.o $<
+
  tools/system:  boot/head.o init/main.o linuxsubdirs
         $(LD) $(LDFLAGS) -M boot/head.o init/main.o \
                 $(ARCHIVES) \
@@ -157,17 +160,17 @@ clean:
         rm -f Image System.map tmp_make core boot/bootsect boot/setup \
                 boot/bootsect.s boot/setup.s init/main.s
         rm -f init/*.o tools/system tools/build boot/*.o
-       for i in $(SUBDIRS); do (cd $$i; $(MAKE) clean); done
+       for i in $(SUBDIRS); do (cd $$i && $(MAKE) clean); done
  
  backup: clean
-       cd .. ; tar cf - linux | compress - > backup.Z
+       cd .. && tar cf - linux | compress - > backup.Z
         sync
  
  depend dep:
         sed '/\#\#\# Dependencies/q' < Makefile > tmp_make
         for i in init/*.c;do echo -n "init/";$(CPP) -M $$i;done >> tmp_make
         cp tmp_make Makefile
-       for i in $(SUBDIRS); do (cd $$i; $(MAKE) dep) || exit; done
+       for i in $(SUBDIRS); do (cd $$i && $(MAKE) dep) || exit; done
  
  dummy:
  
diff --git a/boot/head.s b/boot/head.s

index 30ef2d861c5ecf739abd408dcc5a1cea2740e6c7..2444502a08a0d844be51d76926cd051870e1a5ba 100644 (file)
--- a/boot/head.s
+++ b/boot/head.s
@@ -12,11 +12,11 @@
   * the page directory.
   */
  .text
-.globl _idt,_gdt,_pg_dir,_tmp_floppy_area,_floppy_track_buffer
+.globl _idt,_gdt,_swapper_pg_dir,_tmp_floppy_area,_floppy_track_buffer
  /*
- * pg_dir is the main page directory, address 0x00000000
+ * swapper_pg_dir is the main page directory, address 0x00000000
   */
-_pg_dir:
+_swapper_pg_dir:
  startup_32:
         cld
         movl $0x10,%eax
@@ -26,13 +26,6 @@ startup_32:
         mov %ax,%gs
         lss _stack_start,%esp
         call setup_idt
-       call setup_gdt
-       movl $0x10,%eax         # reload all the segment registers
-       mov %ax,%ds             # after changing gdt. CS was already
-       mov %ax,%es             # reloaded in 'setup_gdt'
-       mov %ax,%fs
-       mov %ax,%gs
-       lss _stack_start,%esp
         xorl %eax,%eax
  1:     incl %eax               # check that A20 really IS enabled
         movl %eax,0x000000      # loop forever if it isn't
@@ -94,9 +87,9 @@ check_x87:
   *  setup_idt
   *
   *  sets up a idt with 256 entries pointing to
- *  ignore_int, interrupt gates. It then loads
- *  idt. Everything that wants to install itself
- *  in the idt-table may do so themselves. Interrupts
+ *  ignore_int, interrupt gates. It doesn't actually load
+ *  idt - that can be done only after paging has been enabled
+ *  and the kernel moved to 0xC0000000. Interrupts
   *  are enabled elsewhere, when we can be relatively
   *  sure everything is ok. This routine will be over-
   *  written by the page tables.
@@ -115,21 +108,6 @@ rp_sidt:
         addl $8,%edi
         dec %ecx
         jne rp_sidt
-       lidt idt_descr
-       ret
-
-/*
- *  setup_gdt
- *
- *  This routines sets up a new gdt and loads it.
- *  Only two entries are currently built, the same
- *  ones that were built in init.s. The routine
- *  is VERY complicated at two whole lines, so this
- *  rather long comment is certainly needed :-).
- *  This routine will beoverwritten by the page tables.
- */
-setup_gdt:
-       lgdt gdt_descr
         ret
  
  /*
@@ -185,6 +163,15 @@ _floppy_track_buffer:
  
  after_page_tables:
         call setup_paging
+       lgdt gdt_descr
+       lidt idt_descr
+       ljmp $0x08,$1f
+1:     movl $0x10,%eax         # reload all the segment registers
+       mov %ax,%ds             # after changing gdt.
+       mov %ax,%es
+       mov %ax,%fs
+       mov %ax,%gs
+       lss _stack_start,%esp
         pushl $0                # These are the parameters to main :-)
         pushl $0
         pushl $0
@@ -248,14 +235,17 @@ ignore_int:
   */
  .align 2
  setup_paging:
-       movl $1024*5,%ecx               /* 5 pages - pg_dir+4 page tables */
+       movl $1024*5,%ecx               /* 5 pages - swapper_pg_dir+4 page tables */
         xorl %eax,%eax
-       xorl %edi,%edi                  /* pg_dir is at 0x000 */
+       xorl %edi,%edi                  /* swapper_pg_dir is at 0x000 */
         cld;rep;stosl
-       movl $pg0+7,_pg_dir             /* set present bit/user r/w */
-       movl $pg1+7,_pg_dir+4           /*  --------- " " --------- */
-       movl $pg2+7,_pg_dir+8           /*  --------- " " --------- */
-       movl $pg3+7,_pg_dir+12          /*  --------- " " --------- */
+/* Identity-map the kernel in low 4MB memory for ease of transition */
+       movl $pg0+7,_swapper_pg_dir             /* set present bit/user r/w */
+/* But the real place is at 0xC0000000 */
+       movl $pg0+7,_swapper_pg_dir+3072        /* set present bit/user r/w */
+       movl $pg1+7,_swapper_pg_dir+3076        /*  --------- " " --------- */
+       movl $pg2+7,_swapper_pg_dir+3080        /*  --------- " " --------- */
+       movl $pg3+7,_swapper_pg_dir+3084        /*  --------- " " --------- */
         movl $pg3+4092,%edi
         movl $0xfff007,%eax             /*  16Mb - 4096 + 7 (r/w user,p) */
         std
@@ -263,29 +253,39 @@ setup_paging:
         subl $0x1000,%eax
         jge 1b
         cld
-       xorl %eax,%eax          /* pg_dir is at 0x0000 */
+       xorl %eax,%eax          /* swapper_pg_dir is at 0x0000 */
         movl %eax,%cr3          /* cr3 - page directory start */
         movl %cr0,%eax
         orl $0x80000000,%eax
         movl %eax,%cr0          /* set paging (PG) bit */
         ret                     /* this also flushes prefetch-queue */
  
-.align 2
+/*
+ * The interrupt descriptor table has room for 256 idt's
+ */
+.align 4
  .word 0
  idt_descr:
         .word 256*8-1           # idt contains 256 entries
-       .long _idt
-.align 2
+       .long 0xc0000000+_idt
+
+.align 4
+_idt:
+       .fill 256,8,0           # idt is uninitialized
+
+/*
+ * The real GDT is also 256 entries long - no real reason
+ */
+.align 4
  .word 0
  gdt_descr:
-       .word 256*8-1           # so does gdt (not that that's any
-       .long _gdt              # magic number, but it works for me :^)
-
-       .align 3
-_idt:  .fill 256,8,0           # idt is uninitialized
+       .word 256*8-1
+       .long 0xc0000000+_gdt
  
-_gdt:  .quad 0x0000000000000000        /* NULL descriptor */
-       .quad 0x00c09a0000000fff        /* 16Mb */
-       .quad 0x00c0920000000fff        /* 16Mb */
+.align 4
+_gdt:
+       .quad 0x0000000000000000        /* NULL descriptor */
+       .quad 0xc0c09a0000000fff        /* 16Mb at 0xC0000000 */
+       .quad 0xc0c0920000000fff        /* 16Mb */
         .quad 0x0000000000000000        /* TEMPORARY - don't use */
         .fill 252,8,0                   /* space for LDT's and TSS's etc */
diff --git a/fs/Makefile b/fs/Makefile

index bfe604b37e05ccb119ad6031f661fc027008f636..84591324a2c1cca2dc6ebcdb7a21404b9f0119c0 100644 (file)
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -18,7 +18,7 @@ SUBDIRS       =minix ext msdos
  
  OBJS=  open.o read_write.o inode.o file_table.o buffer.o super.o \
         block_dev.o stat.o exec.o pipe.o namei.o fcntl.o ioctl.o \
-       select.o fifo.o
+       select.o fifo.o locks.o
  
  all: fs.o fssubdirs
  
@@ -26,18 +26,18 @@ fs.o: $(OBJS)
         $(LD) -r -o fs.o $(OBJS)
  
  fssubdirs: dummy
-       @for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE)) || exit; done
+       @for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE)) || exit; done
  
  clean:
         rm -f core *.o *.a tmp_make
         for i in *.c; do rm -f `basename $$i .c`.s;done
-       for i in $(SUBDIRS); do (cd $$i; $(MAKE) clean); done
+       for i in $(SUBDIRS); do (cd $$i && $(MAKE) clean); done
  
  depend dep:
         sed '/\#\#\# Dependencies/q' < Makefile > tmp_make
         for i in *.c;do $(CPP) -M $$i;done >> tmp_make
         cp tmp_make Makefile
-       for i in $(SUBDIRS); do (cd $$i; $(MAKE) dep) || exit; done
+       for i in $(SUBDIRS); do (cd $$i && $(MAKE) dep) || exit; done
  
  dummy:
  
diff --git a/fs/buffer.c b/fs/buffer.c

index b8604bebc3a9a382521d3b821b05a6764e81d933..2c5b95378bffffae043887ed38ac7139542dde4c 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -494,7 +494,6 @@ void grow_buffers(int size)
                 tmp = bh;
                 bh->b_data = (char * ) (page+i);
                 bh->b_size = size;
-               i += size;
         }
         tmp = bh;
         while (1) {
diff --git a/fs/exec.c b/fs/exec.c

index 2be1aa3580c8d37f955e92f233d1d3ca4bea0957..c0f1c21bbbfd197993432bcf991aeaa6ae46470b 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -80,7 +80,7 @@ int core_dump(long signr, struct pt_regs * regs)
         if(current->rlim[RLIMIT_CORE].rlim_cur < PAGE_SIZE/1024) return 0;
         __asm__("mov %%fs,%0":"=r" (fs));
         __asm__("mov %0,%%fs"::"r" ((unsigned short) 0x10));
-       if (open_namei("core",O_CREAT | O_WRONLY | O_TRUNC,0600,&inode))
+       if (open_namei("core",O_CREAT | O_WRONLY | O_TRUNC,0600,&inode,NULL))
                 goto end_coredump;
         if (!S_ISREG(inode->i_mode))
                 goto end_coredump;
@@ -169,17 +169,15 @@ int sys_uselib(const char * library)
         struct inode * inode;
         struct buffer_head * bh;
         struct exec ex;
+       int error;
  
-       if (get_limit(0x17) != TASK_SIZE)
+       if (!library || get_limit(0x17) != TASK_SIZE)
                 return -EINVAL;
         if ((libnum >= MAX_SHARED_LIBS) || (libnum < 0))
                 return -EINVAL;
-       if (library)
-               inode = namei(library);
-       else
-               inode = NULL;
-       if (!inode)
-               return -ENOENT;
+       error = namei(library,&inode);
+       if (error)
+               return error;
         if (!inode->i_sb || !S_ISREG(inode->i_mode) || !permission(inode,MAY_READ)) {
                 iput(inode);
                 return -EACCES;
@@ -203,6 +201,7 @@ int sys_uselib(const char * library)
         current->libraries[libnum].library = inode;
         current->libraries[libnum].start = ex.a_entry;
         current->libraries[libnum].length = (ex.a_data+ex.a_text+0xfff) & 0xfffff000;
+       current->libraries[libnum].bss = (ex.a_bss+0xfff) & 0xfffff000;
  #if 0
         printk("Loaded library %d at %08x, length %08x\n",
                 libnum,
@@ -334,19 +333,19 @@ static unsigned long change_ldt(unsigned long text_size,unsigned long * page)
  
         code_limit = TASK_SIZE;
         data_limit = TASK_SIZE;
-       code_base = get_base(current->ldt[1]);
-       data_base = code_base;
+       code_base = data_base = 0;
+       current->start_code = code_base;
         set_base(current->ldt[1],code_base);
         set_limit(current->ldt[1],code_limit);
         set_base(current->ldt[2],data_base);
         set_limit(current->ldt[2],data_limit);
  /* make sure fs points to the NEW data segment */
         __asm__("pushl $0x17\n\tpop %%fs"::);
-       data_base += data_limit - LIBRARY_SIZE;
+       data_base += data_limit;
         for (i=MAX_ARG_PAGES-1 ; i>=0 ; i--) {
                 data_base -= PAGE_SIZE;
                 if (page[i])
-                       put_dirty_page(page[i],data_base);
+                       put_dirty_page(current,page[i],data_base);
         }
         return data_limit;
  }
@@ -405,8 +404,9 @@ int do_execve(unsigned long * eip,long tmp,char * filename,
                 panic("execve called from supervisor mode");
         for (i=0 ; i<MAX_ARG_PAGES ; i++)       /* clear page-table */
                 page[i]=0;
-       if (!(inode=namei(filename)))           /* get executables inode */
-               return -ENOENT;
+       retval = namei(filename,&inode);        /* get executable inode */
+       if (retval)
+               return retval;
         argc = count(argv);
         envc = count(envp);
         
@@ -520,12 +520,10 @@ restart_interp:
                  */
                 old_fs = get_fs();
                 set_fs(get_ds());
-               if (!(inode=namei(interp))) { /* get executables inode */
-                       set_fs(old_fs);
-                       retval = -ENOENT;
-                       goto exec_error1;
-               }
+               retval = namei(interp,&inode);
                 set_fs(old_fs);
+               if (retval)
+                       goto exec_error1;
                 goto restart_interp;
         }
         brelse(bh);
@@ -582,19 +580,18 @@ restart_interp:
                 if ((current->close_on_exec>>i)&1)
                         sys_close(i);
         current->close_on_exec = 0;
-       free_page_tables(get_base(current->ldt[1]),get_limit(0x0f));
-       free_page_tables(get_base(current->ldt[2]),get_limit(0x17));
+       clear_page_tables(current);
         if (last_task_used_math == current)
                 last_task_used_math = NULL;
         current->used_math = 0;
         p += change_ldt(ex.a_text,page);
-       p -= LIBRARY_SIZE + MAX_ARG_PAGES*PAGE_SIZE;
+       p -= MAX_ARG_PAGES*PAGE_SIZE;
         p = (unsigned long) create_tables((char *)p,argc,envc);
         current->brk = ex.a_bss +
                 (current->end_data = ex.a_data +
                 (current->end_code = ex.a_text));
         current->start_stack = p;
-       current->rss = (LIBRARY_OFFSET - p + PAGE_SIZE-1) / PAGE_SIZE;
+       current->rss = (TASK_SIZE - p + PAGE_SIZE-1) / PAGE_SIZE;
         current->suid = current->euid = e_uid;
         current->sgid = current->egid = e_gid;
         if (N_MAGIC(ex) == OMAGIC)
diff --git a/fs/ext/freelists.c b/fs/ext/freelists.c

index 454796ca982bc01c08b6748341b4f290b465a60d..a2f6ed372a1d3a108042ca67a0dae411899c4d97 100644 (file)
--- a/fs/ext/freelists.c
+++ b/fs/ext/freelists.c
@@ -290,6 +290,7 @@ printk("ext_free_inode: inode empty, skipping to %d\n", efi->next);
         inode->i_ino = j;
         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
         inode->i_op = NULL;
+       inode->i_blocks = inode->i_blksize = 0;
  #ifdef EXTFS_DEBUG
  printk("ext_new_inode : allocating inode %d\n", inode->i_ino);
  #endif
diff --git a/fs/ext/inode.c b/fs/ext/inode.c

index 67ed5233a12f3a169ab33dc08cfc474be7d4142a..e8874e53f9e0cfa5f734467c3b1f2e802f4ac3ac 100644 (file)
--- a/fs/ext/inode.c
+++ b/fs/ext/inode.c
@@ -295,6 +295,7 @@ void ext_read_inode(struct inode * inode)
         inode->i_nlink = raw_inode->i_nlinks;
         inode->i_size = raw_inode->i_size;
         inode->i_mtime = inode->i_atime = inode->i_ctime = raw_inode->i_time;
+       inode->i_blocks = inode->i_blksize = 0;
         if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
                 inode->i_rdev = raw_inode->i_zone[0];
         else for (block = 0; block < 12; block++)
diff --git a/fs/ext/symlink.c b/fs/ext/symlink.c

index 1daf1cb0a23ee9dd1c4eb66979e202ed97c7ee61..96b3b6e1695bfe8c671eea841da39bc7305c7791 100644 (file)
--- a/fs/ext/symlink.c
+++ b/fs/ext/symlink.c
@@ -21,7 +21,7 @@
  #include <linux/stat.h>
  
  static int ext_readlink(struct inode *, char *, int);
-static struct inode * ext_follow_link(struct inode *, struct inode *);
+static int ext_follow_link(struct inode *, struct inode *, int, int, struct inode **);
  
  /*
   * symlinks can't do much...
@@ -43,8 +43,10 @@ struct inode_operations ext_symlink_inode_operations = {
         NULL                    /* truncate */
  };
  
-static struct inode * ext_follow_link(struct inode * dir, struct inode * inode)
+static int ext_follow_link(struct inode * dir, struct inode * inode,
+       int flag, int mode, struct inode ** res_inode)
  {
+       int error;
         unsigned short fs;
         struct buffer_head * bh;
  
@@ -54,27 +56,30 @@ static struct inode * ext_follow_link(struct inode * dir, struct inode * inode)
         }
         if (!inode) {
                 iput(dir);
-               return NULL;
+               *res_inode = NULL;
+               return -ENOENT;
         }
         if (!S_ISLNK(inode->i_mode)) {
                 iput(dir);
-               return inode;
+               *res_inode = inode;
+               return 0;
         }
         __asm__("mov %%fs,%0":"=r" (fs));
         if ((current->link_count > 5) || !inode->i_data[0] ||
            !(bh = bread(inode->i_dev, inode->i_data[0], BLOCK_SIZE))) {
                 iput(dir);
                 iput(inode);
-               return NULL;
+               *res_inode = NULL;
+               return -ELOOP;
         }
         iput(inode);
         __asm__("mov %0,%%fs"::"r" ((unsigned short) 0x10));
         current->link_count++;
-       inode = _namei(bh->b_data,dir,1);
+       error = open_namei(bh->b_data,flag,mode,res_inode,dir);
         current->link_count--;
         __asm__("mov %0,%%fs"::"r" (fs));
         brelse(bh);
-       return inode;
+       return error;
  }
  
  static int ext_readlink(struct inode * inode, char * buffer, int buflen)
diff --git a/fs/fcntl.c b/fs/fcntl.c

index ed2decff70c074d5511a75711f15ffc2ef39a083..fa0d23bd0e79fc3d7557d4d9a5b01134ed672ee4 100644 (file)
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -14,6 +14,8 @@
  #include <linux/string.h>
  
  extern int sys_close(int fd);
+extern int fcntl_getlk(unsigned int, struct flock *);
+extern int fcntl_setlk(unsigned int, unsigned int, struct flock *);
  
  static int dupfd(unsigned int fd, unsigned int arg)
  {
@@ -72,8 +74,12 @@ int sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
                         filp->f_flags &= ~(O_APPEND | O_NONBLOCK);
                         filp->f_flags |= arg & (O_APPEND | O_NONBLOCK);
                         return 0;
-               case F_GETLK:   case F_SETLK:   case F_SETLKW:
-                       return -ENOSYS;
+               case F_GETLK:
+                       return fcntl_getlk(fd, (struct flock *) arg);
+               case F_SETLK:
+                       return fcntl_setlk(fd, cmd, (struct flock *) arg);
+               case F_SETLKW:
+                       return fcntl_setlk(fd, cmd, (struct flock *) arg);
                 default:
                         /* sockets need a few special fcntls. */
                         if (S_ISSOCK (filp->f_inode->i_mode))
diff --git a/fs/locks.c b/fs/locks.c

new file mode 100644 (file)

index 0000000..d99821b
--- /dev/null
+++ b/fs/locks.c
@@ -0,0 +1,471 @@
+/*
+ *  linux/fs/locks.c
+ *
+ *  Provide support for fcntl()'s F_GETLK, F_SETLK, and F_SETLKW calls.
+ *  Doug Evans, 92Aug07, dje@sspiff.uucp.
+ *
+ * FIXME: two things aren't handled yet:
+ *     - deadlock detection/avoidance (of dubious merit, but since it's in
+ *       the definition, I guess it should be provided eventually)
+ *     - mandatory locks (requires lots of changes elsewhere)
+ */
+
+#include <asm/segment.h>
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+
+#define OFFSET_MAX     0x7fffffff      /* FIXME: move elsewhere? */
+
+static int copy_flock(struct file *filp, struct file_lock *fl, struct flock *l);
+static int conflict(struct file_lock *caller_fl, struct file_lock *sys_fl);
+static int overlap(struct file_lock *fl1, struct file_lock *fl2);
+static int lock_it(struct file *filp, struct file_lock *caller);
+static int unlock_it(struct file *filp, struct file_lock *caller);
+static struct file_lock *alloc_lock(struct file *filp, struct file_lock *template);
+static void free_lock(struct file *filp, struct file_lock *fl);
+
+static struct file_lock file_lock_table[NR_FILE_LOCKS];
+static struct file_lock *file_lock_free_list;
+
+/*
+ * Called at boot time to initialize the lock table ...
+ */
+
+void fcntl_init_locks(void)
+{
+       struct file_lock *fl;
+
+       for (fl = &file_lock_table[0]; fl < file_lock_table + NR_FILE_LOCKS - 1; fl++) {
+               fl->fl_next = fl + 1;
+               fl->fl_owner = NULL;
+       }
+       file_lock_table[NR_FILE_LOCKS - 1].fl_next = NULL;
+       file_lock_table[NR_FILE_LOCKS - 1].fl_owner = NULL;
+       file_lock_free_list = &file_lock_table[0];
+}
+
+int fcntl_getlk(unsigned int fd, struct flock *l)
+{      
+       struct flock flock;
+       struct file *filp;
+       struct file_lock *fl,file_lock;
+
+       if (fd >= NR_OPEN || !(filp = current->filp[fd]))
+               return -EBADF;
+       verify_area(l, sizeof(*l));
+       memcpy_fromfs(&flock, l, sizeof(flock));
+       if (flock.l_type == F_UNLCK)
+               return -EINVAL;
+       if (!copy_flock(filp, &file_lock, &flock))
+               return -EINVAL;
+
+       for (fl = filp->f_inode->i_flock; fl != NULL; fl = fl->fl_next) {
+               if (conflict(&file_lock, fl)) {
+                       flock.l_pid = fl->fl_owner->pid;
+                       flock.l_start = fl->fl_start;
+                       flock.l_len = fl->fl_end == OFFSET_MAX ? 0 :
+                               fl->fl_end - fl->fl_start + 1;
+                       flock.l_whence = fl->fl_whence;
+                       flock.l_type = fl->fl_type;
+                       memcpy_tofs(l, &flock, sizeof(flock));
+                       return 0;
+               }
+       }
+
+       flock.l_type = F_UNLCK;                 /* no conflict found */
+       memcpy_tofs(l, &flock, sizeof(flock));
+       return 0;
+}
+
+/*
+ * This function implements both F_SETLK and F_SETLKW.
+ */
+
+int fcntl_setlk(unsigned int fd, unsigned int cmd, struct flock *l)
+{      
+       struct file *filp;
+       struct file_lock *fl,file_lock;
+       struct flock flock;
+
+       /*
+        * Get arguments and validate them ...
+        */
+
+       if (fd >= NR_OPEN || !(filp = current->filp[fd]))
+               return -EBADF;
+       verify_area(l, sizeof(*l));
+       memcpy_fromfs(&flock, l, sizeof(flock));
+       if (!copy_flock(filp, &file_lock, &flock))
+               return -EINVAL;
+       switch (file_lock.fl_type) {
+       case F_RDLCK :
+               if (!(filp->f_mode & 1))
+                       return -EBADF;
+               break;
+       case F_WRLCK :
+               if (!(filp->f_mode & 2))
+                       return -EBADF;
+               break;
+       case F_UNLCK :
+               break;
+       }
+
+       /*
+        * F_UNLCK needs to be handled differently ...
+        */
+
+       if (file_lock.fl_type == F_UNLCK)
+               return unlock_it(filp, &file_lock);
+
+       /*
+        * Scan for a conflicting lock ...
+        */
+
+repeat:
+       for (fl = filp->f_inode->i_flock; fl != NULL; fl = fl->fl_next) {
+               if (!conflict(&file_lock, fl))
+                       continue;
+               /*
+                * File is locked by another process. If this is F_SETLKW
+                * wait for the lock to be released.
+                * FIXME: We need to check for deadlocks here.
+                */
+               if (cmd == F_SETLKW) {
+                       interruptible_sleep_on(&fl->fl_wait);
+                       goto repeat;
+               }
+               return -EAGAIN;
+       }
+
+       /*
+        * Lock doesn't conflict with any other lock ...
+        */
+
+       return lock_it(filp, &file_lock);
+}
+
+/*
+ * This function is called when the file is closed.
+ */
+
+void fcntl_remove_locks(struct task_struct *task, struct file *filp)
+{
+       struct file_lock *fl,*next;
+
+       for (fl = filp->f_inode->i_flock; fl != NULL; ) {
+               /*
+                * If this one is freed, {fl_next} gets clobbered when the
+                * entry is moved to the free list, so grab it now ...
+                */
+               next = fl->fl_next;
+               if (fl->fl_owner == task)
+                       free_lock(filp, fl);
+               fl = next;
+       }
+}
+
+/*
+ * Verify a "struct flock" and copy it to a "struct file_lock" ...
+ * Result is a boolean indicating success.
+ */
+
+static int copy_flock(struct file *filp, struct file_lock *fl, struct flock *l)
+{
+       off_t start;
+
+       if (!filp->f_inode)     /* just in case */
+               return 0;
+       if (!S_ISREG(filp->f_inode->i_mode))
+               return 0;
+       if (l->l_type != F_UNLCK && l->l_type != F_RDLCK && l->l_type != F_WRLCK)
+               return 0;
+       switch (l->l_whence) {
+       case 0 /*SEEK_SET*/ : start = 0; break;
+       case 1 /*SEEK_CUR*/ : start = filp->f_pos; break;
+       case 2 /*SEEK_END*/ : start = filp->f_inode->i_size; break;
+       default : return 0;
+       }
+       if ((start += l->l_start) < 0 || l->l_len < 0)
+               return 0;
+       fl->fl_type = l->l_type;
+       fl->fl_start = start;   /* we record the absolute position */
+       fl->fl_whence = 0;      /* FIXME: do we record {l_start} as passed? */
+       if (l->l_len == 0 || (fl->fl_end = start + l->l_len - 1) < 0)
+               fl->fl_end = OFFSET_MAX;
+       fl->fl_owner = current;
+       fl->fl_wait = NULL;             /* just for cleanliness */
+       return 1;
+}
+
+/*
+ * Determine if lock {sys_fl} blocks lock {caller_fl} ...
+ */
+
+static int conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
+{
+       if (caller_fl->fl_owner == sys_fl->fl_owner)
+               return 0;
+       if (!overlap(caller_fl, sys_fl))
+               return 0;
+       switch (caller_fl->fl_type) {
+       case F_RDLCK :
+               return sys_fl->fl_type != F_RDLCK;
+       case F_WRLCK :
+               return 1;       /* overlapping region not owned by caller */
+       }
+       return 0;       /* shouldn't get here, but just in case */
+}
+
+static int overlap(struct file_lock *fl1, struct file_lock *fl2)
+{
+       if (fl1->fl_start <= fl2->fl_start) {
+               return fl1->fl_end >= fl2->fl_start;
+       } else {
+               return fl2->fl_end >= fl1->fl_start;
+       }
+}
+
+/*
+ * Add a lock to a file ...
+ * Result is 0 for success or -ENOLCK.
+ *
+ * We try to be real clever here and always minimize the number of table
+ * entries we use. For example we merge adjacent locks whenever possible. This
+ * consumes a bit of cpu and code space, is it really worth it? Beats me.
+ *
+ * I've tried to keep the following as small and simple as possible. If you can
+ * make it smaller or simpler, please do. /dje 92Aug11
+ *
+ * WARNING: We assume the lock doesn't conflict with any other lock.
+ */
+
+static int lock_it(struct file *filp, struct file_lock *caller)
+{
+       struct file_lock *fl,*new;
+
+       /*
+        * It's easier if we allocate a slot for the lock first, and then
+        * release it later if we have to (IE: if it can be merged with
+        * another). This way the for() loop always knows that {caller} is an
+        * existing entry. This will cause the routine to fail unnecessarily
+        * in rare cases, but perfection can be pushed too far. :-)
+        */
+
+       if ((caller = alloc_lock(filp, caller)) == NULL)
+               return -ENOLCK;
+
+       /*
+        * First scan to see if we are changing/augmenting an existing lock ...
+        */
+
+       for (fl = filp->f_inode->i_flock; fl != NULL; fl = fl->fl_next) {
+               if (caller->fl_owner != fl->fl_owner)
+                       continue;
+               if (caller == fl)
+                       continue;
+               if (!overlap(caller, fl)) {
+                       /*
+                        * Detect adjacent regions (if same lock type) ...
+                        */
+                       if (caller->fl_type != fl->fl_type)
+                               continue;
+                       if (caller->fl_end + 1 == fl->fl_start) {
+                               fl->fl_start = caller->fl_start;
+                               free_lock(filp, caller);
+                               caller = fl;
+                               /* must continue, may overlap others now */
+                       } else if (caller->fl_start - 1 == fl->fl_end) {
+                               fl->fl_end = caller->fl_end;
+                               free_lock(filp, caller);
+                               caller = fl;
+                               /* must continue, may overlap others now */
+                       }
+                       continue;
+               }
+               /*
+                * We've found an overlapping region. Is it a change of lock
+                * type, or are we changing the size of the locked space?
+                */
+               if (caller->fl_type != fl->fl_type) {
+                       if (caller->fl_start > fl->fl_start && caller->fl_end < fl->fl_end) {
+                               /*
+                                * The new lock splits the old one in two ...
+                                * {fl} is the bottom piece, {caller} is the
+                                * new lock, and {new} is the top piece.
+                                */
+                               if ((new = alloc_lock(filp, fl)) == NULL) {
+                                       free_lock(filp, caller);
+                                       return -ENOLCK;
+                               }
+                               fl->fl_end = caller->fl_start - 1;
+                               new->fl_start = caller->fl_end + 1;
+                               return 0;
+                       }
+                       if (caller->fl_start <= fl->fl_start && caller->fl_end >= fl->fl_end) {
+                               /*
+                                * The new lock completely replaces old one ...
+                                */
+                               free_lock(filp, fl);
+                               return 0;
+                       }
+                       if (caller->fl_end < fl->fl_end) {
+                               fl->fl_start = caller->fl_end + 1;
+                               /* must continue, may be more overlaps */
+                       } else if (caller->fl_start > fl->fl_start) {
+                               fl->fl_end = caller->fl_start - 1;
+                               /* must continue, may be more overlaps */
+                       } else {
+                               printk("lock_it: program bug: unanticipated overlap\n");
+                               free_lock(filp, caller);
+                               return -ENOLCK;
+                       }
+               } else {        /* The new lock augments an existing lock ... */
+                       int grew = 0;
+
+                       if (caller->fl_start < fl->fl_start) {
+                               fl->fl_start = caller->fl_start;
+                               grew = 1;
+                       }
+                       if (caller->fl_end > fl->fl_end) {
+                               fl->fl_end = caller->fl_end;
+                               grew = 1;
+                       }
+                       free_lock(filp, caller);
+                       caller = fl;
+                       if (!grew)
+                               return 0;
+                       /* must continue, may be more overlaps */
+               }
+       }
+
+       /*
+        * New lock doesn't overlap any regions ...
+        * alloc_lock() has already been called, so we're done!
+        */
+
+       return 0;
+}
+
+/*
+ * Handle F_UNLCK ...
+ * Result is 0 for success, or -EINVAL or -ENOLCK.
+ * ENOLCK can happen when a lock is split into two.
+ */
+
+static int unlock_it(struct file *filp, struct file_lock *caller)
+{
+       int one_unlocked = 0;
+       struct file_lock *fl,*next;
+
+       for (fl = filp->f_inode->i_flock; fl != NULL; ) {
+               if (caller->fl_owner != fl->fl_owner || !overlap(caller, fl)) {
+                       fl = fl->fl_next;
+                       continue;
+               }
+               one_unlocked = 1;
+               if (caller->fl_start > fl->fl_start && caller->fl_end < fl->fl_end) {
+                       /*
+                        * Lock is split in two ...
+                        * {fl} is the bottom piece, {next} is the top piece.
+                        */
+                       if ((next = alloc_lock(filp, fl)) == NULL)
+                               return -ENOLCK;
+                       fl->fl_end = caller->fl_start - 1;
+                       next->fl_start = caller->fl_end + 1;
+                       return 0;
+               }
+               /*
+                * At this point we know there is an overlap and we know the
+                * lock isn't split into two ...
+                *
+                * Unless the lock table is broken, entries will not overlap.
+                * IE: User X won't have an entry locking bytes 1-3 and another
+                * entry locking bytes 3-5. Therefore, if the area being
+                * unlocked is a subset of the total area, we don't need to
+                * traverse any more of the list. The code is a tad more
+                * complicated by this optimization. Perhaps it's not worth it.
+                *
+                * WARNING: We assume free_lock() does not alter
+                *      {fl_start, fl_end}.
+                *
+                * {fl_next} gets clobbered when the entry is moved to
+                * the free list, so grab it now ...
+                */
+               next = fl->fl_next;
+               if (caller->fl_start <= fl->fl_start && caller->fl_end >= fl->fl_end) {
+                       free_lock(filp, fl);
+               } else if (caller->fl_start > fl->fl_start) {
+                       fl->fl_end = caller->fl_start - 1;
+               } else {
+                       /* caller->fl_end < fl->fl_end */
+                       fl->fl_start = caller->fl_end + 1;
+               }
+               if (caller->fl_start >= fl->fl_start && caller->fl_end <= fl->fl_end)
+                       return 0;               /* no more to be found */
+               fl = next;
+               /* must continue, there may be more to unlock */
+       }
+
+       return one_unlocked ? 0 : -EINVAL;
+}
+
+static struct file_lock *alloc_lock(struct file *filp, struct file_lock *template)
+{
+       struct file_lock *new;
+
+       if (file_lock_free_list == NULL)
+               return NULL;                    /* no available entry */
+       if (file_lock_free_list->fl_owner != NULL)
+               panic("alloc_lock: broken free list\n");
+
+       new = file_lock_free_list;              /* remove from free list */
+       file_lock_free_list = file_lock_free_list->fl_next;
+
+       *new = *template;
+
+       new->fl_next = filp->f_inode->i_flock;  /* insert into file's list */
+       filp->f_inode->i_flock = new;
+
+       new->fl_owner = current;        /* FIXME: needed? */
+       new->fl_wait = NULL;
+       return new;
+}
+
+/*
+ * Add a lock to the free list ...
+ *
+ * WARNING: We must not alter {fl_start, fl_end}. See unlock_it().
+ */
+
+static void free_lock(struct file *filp, struct file_lock *fl)
+{
+       struct file_lock **fl_p;
+
+       if (fl->fl_owner == NULL)       /* sanity check */
+               panic("free_lock: broken lock list\n");
+
+       /*
+        * We only use a singly linked list to save some memory space
+        * (the only place we'd use a doubly linked list is here).
+        */
+
+       for (fl_p = &filp->f_inode->i_flock; *fl_p != NULL; fl_p = &(*fl_p)->fl_next) {
+               if (*fl_p == fl)
+                       break;
+       }
+       if (*fl_p == NULL) {
+               printk("free_lock: lock is not in file's lock list\n");
+       } else {
+               *fl_p = (*fl_p)->fl_next;
+       }
+
+       fl->fl_next = file_lock_free_list;      /* add to free list */
+       file_lock_free_list = fl;
+       fl->fl_owner = NULL;                    /* for sanity checks */
+
+       wake_up(&fl->fl_wait);
+}
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c

index ddf74f616528b498d4f7dcf3745d8f28c34249ea..51082b2bd43574b8fb42cfcd589b20f8d06d07c7 100644 (file)
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -230,6 +230,7 @@ struct inode * minix_new_inode(int dev)
         inode->i_ino = j + i*8192;
         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
         inode->i_op = NULL;
+       inode->i_blocks = inode->i_blksize = 0;
         return inode;
  }
  
diff --git a/fs/minix/inode.c b/fs/minix/inode.c

index b90a4e8d7d880501485e062b14ba49aeae548833..d8ae3bce42c9308a84cc16fadc7b69b819ab83f6 100644 (file)
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -230,6 +230,7 @@ void minix_read_inode(struct inode * inode)
         inode->i_nlink = raw_inode->i_nlinks;
         inode->i_size = raw_inode->i_size;
         inode->i_mtime = inode->i_atime = inode->i_ctime = raw_inode->i_time;
+       inode->i_blocks = inode->i_blksize = 0;
         if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
                 inode->i_rdev = raw_inode->i_zone[0];
         else for (block = 0; block < 9; block++)
diff --git a/fs/minix/symlink.c b/fs/minix/symlink.c

index 65263a0e8cc8c4e9ad1d2bbd9be443d6ec8a1ca1..b5683ba20dcf18b0f4f25c1559d6a7e2890a50b6 100644 (file)
--- a/fs/minix/symlink.c
+++ b/fs/minix/symlink.c
@@ -15,7 +15,7 @@
  #include <linux/stat.h>
  
  static int minix_readlink(struct inode *, char *, int);
-static struct inode * minix_follow_link(struct inode *, struct inode *);
+static int minix_follow_link(struct inode *, struct inode *, int, int, struct inode **);
  
  /*
   * symlinks can't do much...
@@ -37,8 +37,10 @@ struct inode_operations minix_symlink_inode_operations = {
         NULL                    /* truncate */
  };
  
-static struct inode * minix_follow_link(struct inode * dir, struct inode * inode)
+static int minix_follow_link(struct inode * dir, struct inode * inode,
+       int flag, int mode, struct inode ** res_inode)
  {
+       int error;
         unsigned short fs;
         struct buffer_head * bh;
  
@@ -48,27 +50,30 @@ static struct inode * minix_follow_link(struct inode * dir, struct inode * inode
         }
         if (!inode) {
                 iput(dir);
-               return NULL;
+               *res_inode = NULL;
+               return -ENOENT;
         }
         if (!S_ISLNK(inode->i_mode)) {
                 iput(dir);
-               return inode;
+               *res_inode = inode;
+               return 0;
         }
         __asm__("mov %%fs,%0":"=r" (fs));
         if ((current->link_count > 5) || !inode->i_data[0] ||
            !(bh = bread(inode->i_dev, inode->i_data[0], BLOCK_SIZE))) {
                 iput(dir);
                 iput(inode);
-               return NULL;
+               *res_inode = NULL;
+               return -ELOOP;
         }
         iput(inode);
         __asm__("mov %0,%%fs"::"r" ((unsigned short) 0x10));
         current->link_count++;
-       inode = _namei(bh->b_data,dir,1);
+       error = open_namei(bh->b_data,flag,mode,res_inode,dir);
         current->link_count--;
         __asm__("mov %0,%%fs"::"r" (fs));
         brelse(bh);
-       return inode;
+       return error;
  }
  
  static int minix_readlink(struct inode * inode, char * buffer, int buflen)
diff --git a/fs/msdos/Makefile b/fs/msdos/Makefile

index 485a62b063c7d84709ee4631d889693642dfa538..0d172a211e8b397c4abcb4e17ce95191e4aa2e4f 100644 (file)
--- a/fs/msdos/Makefile
+++ b/fs/msdos/Makefile
@@ -8,10 +8,9 @@
  # Note 2! The CFLAGS definitions are now in the main makefile...
  
  .c.s:
-       $(CC) $(CFLAGS) \
-       -S -o $*.s $<
+       $(CC) $(CFLAGS) -S $<
  .c.o:
-       $(CC) $(CFLAGS) -c -o $*.o $<
+       $(CC) $(CFLAGS) -c $<
  .s.o:
         $(AS) -o $*.o $<
  
diff --git a/fs/namei.c b/fs/namei.c

index 42eb868bd48833c06198ecd4e6f1c5958abf1eda..17b3b448a3ece2465a6fca6fb62a487fdf00e053 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -19,9 +19,6 @@
  #include <linux/fcntl.h>
  #include <linux/stat.h>
  
-struct inode * _namei(const char * filename, struct inode * base,
-       int follow_links);
-
  #define ACC_MODE(x) ("\004\002\006\377"[(x)&O_ACCMODE])
  
  /*
@@ -91,18 +88,21 @@ int lookup(struct inode * dir,const char * name, int len,
         return dir->i_op->lookup(dir,name,len,result);
  }
  
-struct inode * follow_link(struct inode * dir, struct inode * inode)
+int follow_link(struct inode * dir, struct inode * inode,
+       int flag, int mode, struct inode ** res_inode)
  {
         if (!dir || !inode) {
                 iput(dir);
                 iput(inode);
-               return NULL;
+               *res_inode = NULL;
+               return -ENOENT;
         }
         if (!inode->i_op || !inode->i_op->follow_link) {
                 iput(dir);
-               return inode;
+               *res_inode = inode;
+               return 0;
         }
-       return inode->i_op->follow_link(dir,inode);
+       return inode->i_op->follow_link(dir,inode,flag,mode,res_inode);
  }
  
  /*
@@ -111,14 +111,15 @@ struct inode * follow_link(struct inode * dir, struct inode * inode)
   * dir_namei() returns the inode of the directory of the
   * specified name, and the name within that directory.
   */
-static struct inode * dir_namei(const char * pathname,
-       int * namelen, const char ** name, struct inode * base)
+static int dir_namei(const char * pathname, int * namelen, const char ** name,
+       struct inode * base, struct inode ** res_inode)
  {
         char c;
         const char * thisname;
         int len,error;
         struct inode * inode;
  
+       *res_inode = NULL;
         if (!base) {
                 base = current->pwd;
                 base->i_count++;
@@ -139,41 +140,48 @@ static struct inode * dir_namei(const char * pathname,
                 error = lookup(base,thisname,len,&inode);
                 if (error) {
                         iput(base);
-                       return NULL;
+                       return error;
                 }
-               if (!(base = follow_link(base,inode)))
-                       return NULL;
+               error = follow_link(base,inode,0,0,&base);
+               if (error)
+                       return error;
         }
         *name = thisname;
         *namelen = len;
-       return base;
+       *res_inode = base;
+       return 0;
  }
  
-struct inode * _namei(const char * pathname, struct inode * base,
-       int follow_links)
+static int _namei(const char * pathname, struct inode * base,
+       int follow_links, struct inode ** res_inode)
  {
         const char * basename;
         int namelen,error;
         struct inode * inode;
  
-       if (!(base = dir_namei(pathname,&namelen,&basename,base)))
-               return NULL;
+       *res_inode = NULL;
+       error = dir_namei(pathname,&namelen,&basename,base,&base);
+       if (error)
+               return error;
         base->i_count++;        /* lookup uses up base */
         error = lookup(base,basename,namelen,&inode);
         if (error) {
                 iput(base);
-               return NULL;
+               return error;
         }
-       if (follow_links)
-               inode = follow_link(base,inode);
-       else
+       if (follow_links) {
+               error = follow_link(base,inode,0,0,&inode);
+               if (error)
+                       return error;
+       } else
                 iput(base);
-       return inode;
+       *res_inode = inode;
+       return 0;
  }
  
-struct inode * lnamei(const char * pathname)
+int lnamei(const char * pathname, struct inode ** res_inode)
  {
-       return _namei(pathname, NULL, 0);
+       return _namei(pathname,NULL,0,res_inode);
  }
  
  /*
@@ -183,9 +191,9 @@ struct inode * lnamei(const char * pathname)
   * Open, link etc use their own routines, but this is enough for things
   * like 'chmod' etc.
   */
-struct inode * namei(const char * pathname)
+int namei(const char * pathname, struct inode ** res_inode)
  {
-       return _namei(pathname,NULL,1);
+       return _namei(pathname,NULL,1,res_inode);
  }
  
  /*
@@ -194,7 +202,7 @@ struct inode * namei(const char * pathname)
   * namei for open - this is in fact almost the whole open-routine.
   */
  int open_namei(const char * pathname, int flag, int mode,
-       struct inode ** res_inode)
+       struct inode ** res_inode, struct inode * base)
  {
         const char * basename;
         int namelen,error,i;
@@ -205,8 +213,9 @@ int open_namei(const char * pathname, int flag, int mode,
                 flag |= O_WRONLY;
         mode &= 07777 & ~current->umask;
         mode |= I_REGULAR;
-       if (!(dir = dir_namei(pathname,&namelen,&basename,NULL)))
-               return -ENOENT;
+       error = dir_namei(pathname,&namelen,&basename,base,&dir);
+       if (error)
+               return error;
         if (!namelen) {                 /* special case: '/usr/' etc */
                 if (!(flag & (O_ACCMODE|O_CREAT|O_TRUNC))) {
                         *res_inode=dir;
@@ -241,8 +250,8 @@ int open_namei(const char * pathname, int flag, int mode,
                 iput(inode);
                 return -EEXIST;
         }
-       if (!(inode = follow_link(dir,inode)))
-               return -ELOOP;
+       if (error = follow_link(dir,inode,flag,mode,&inode))
+               return error;
         if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
                 if (IS_NODEV(inode)) {
                         iput(inode);
@@ -289,11 +298,12 @@ int open_namei(const char * pathname, int flag, int mode,
  int do_mknod(const char * filename, int mode, int dev)
  {
         const char * basename;
-       int namelen;
+       int namelen, error;
         struct inode * dir;
-       
-       if (!(dir = dir_namei(filename,&namelen,&basename, NULL)))
-               return -ENOENT;
+
+       error = dir_namei(filename,&namelen,&basename, NULL, &dir);
+       if (error)
+               return error;
         if (!namelen) {
                 iput(dir);
                 return -ENOENT;
@@ -323,11 +333,12 @@ int sys_mknod(const char * filename, int mode, int dev)
  int sys_mkdir(const char * pathname, int mode)
  {
         const char * basename;
-       int namelen;
+       int namelen, error;
         struct inode * dir;
  
-       if (!(dir = dir_namei(pathname,&namelen,&basename, NULL)))
-               return -ENOENT;
+       error = dir_namei(pathname,&namelen,&basename,NULL,&dir);
+       if (error)
+               return error;
         if (!namelen) {
                 iput(dir);
                 return -ENOENT;
@@ -350,11 +361,12 @@ int sys_mkdir(const char * pathname, int mode)
  int sys_rmdir(const char * name)
  {
         const char * basename;
-       int namelen;
+       int namelen, error;
         struct inode * dir;
  
-       if (!(dir = dir_namei(name,&namelen,&basename, NULL)))
-               return -ENOENT;
+       error = dir_namei(name,&namelen,&basename,NULL,&dir);
+       if (error)
+               return error;
         if (!namelen) {
                 iput(dir);
                 return -ENOENT;
@@ -377,11 +389,12 @@ int sys_rmdir(const char * name)
  int sys_unlink(const char * name)
  {
         const char * basename;
-       int namelen;
+       int namelen, error;
         struct inode * dir;
  
-       if (!(dir = dir_namei(name,&namelen,&basename, NULL)))
-               return -ENOENT;
+       error = dir_namei(name,&namelen,&basename,NULL,&dir);
+       if (error)
+               return error;
         if (!namelen) {
                 iput(dir);
                 return -EPERM;
@@ -405,11 +418,11 @@ int sys_symlink(const char * oldname, const char * newname)
  {
         struct inode * dir;
         const char * basename;
-       int namelen;
+       int namelen, error;
  
-       dir = dir_namei(newname,&namelen,&basename, NULL);
-       if (!dir)
-               return -ENOENT;
+       error = dir_namei(newname,&namelen,&basename,NULL,&dir);
+       if (error)
+               return error;
         if (!namelen) {
                 iput(dir);
                 return -ENOENT;
@@ -433,15 +446,15 @@ int sys_link(const char * oldname, const char * newname)
  {
         struct inode * oldinode, * dir;
         const char * basename;
-       int namelen;
+       int namelen, error;
  
-       oldinode = namei(oldname);
-       if (!oldinode)
-               return -ENOENT;
-       dir = dir_namei(newname,&namelen,&basename, NULL);
-       if (!dir) {
+       error = namei(oldname, &oldinode);
+       if (error)
+               return error;
+       error = dir_namei(newname,&namelen,&basename,NULL,&dir);
+       if (error) {
                 iput(oldinode);
-               return -EACCES;
+               return error;
         }
         if (!namelen) {
                 iput(oldinode);
@@ -475,11 +488,11 @@ int sys_rename(const char * oldname, const char * newname)
  {
         struct inode * old_dir, * new_dir;
         const char * old_base, * new_base;
-       int old_len, new_len;
+       int old_len, new_len, error;
  
-       old_dir = dir_namei(oldname,&old_len,&old_base, NULL);
-       if (!old_dir)
-               return -ENOENT;
+       error = dir_namei(oldname,&old_len,&old_base,NULL,&old_dir);
+       if (error)
+               return error;
         if (!permission(old_dir,MAY_WRITE)) {
                 iput(old_dir);
                 return -EACCES;
@@ -490,10 +503,10 @@ int sys_rename(const char * oldname, const char * newname)
                 iput(old_dir);
                 return -EPERM;
         }
-       new_dir = dir_namei(newname,&new_len,&new_base, NULL);
-       if (!new_dir) {
+       error = dir_namei(newname,&new_len,&new_base,NULL,&new_dir);
+       if (error) {
                 iput(old_dir);
-               return -ENOENT;
+               return error;
         }
         if (!permission(new_dir,MAY_WRITE)) {
                 iput(old_dir);
diff --git a/fs/open.c b/fs/open.c

index 183b10cbe74f122c5188e7117750e35235bb03e3..3ee6c987f757f7f466e1573a15cbb692e383992b 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -17,6 +17,8 @@
  #include <linux/tty.h>
  #include <asm/segment.h>
  
+extern void fcntl_remove_locks(struct task_struct *, struct file *);
+
  struct file_operations * chrdev_fops[MAX_CHRDEV] = {
         NULL,
  };
@@ -33,10 +35,12 @@ int sys_ustat(int dev, struct ustat * ubuf)
  int sys_statfs(const char * path, struct statfs * buf)
  {
         struct inode * inode;
+       int error;
  
         verify_area(buf, sizeof(struct statfs));
-       if (!(inode = namei(path)))
-               return -ENOENT;
+       error = namei(path,&inode);
+       if (error)
+               return error;
         if (!inode->i_sb->s_op->statfs) {
                 iput(inode);
                 return -ENOSYS;
@@ -65,9 +69,11 @@ int sys_fstatfs(unsigned int fd, struct statfs * buf)
  int sys_truncate(const char * path, unsigned int length)
  {
         struct inode * inode;
+       int error;
  
-       if (!(inode = namei(path)))
-               return -ENOENT;
+       error = namei(path,&inode);
+       if (error)
+               return error;
         if (S_ISDIR(inode->i_mode) || !permission(inode,MAY_WRITE)) {
                 iput(inode);
                 return -EACCES;
@@ -112,9 +118,11 @@ int sys_utime(char * filename, struct utimbuf * times)
  {
         struct inode * inode;
         long actime,modtime;
+       int error;
  
-       if (!(inode=namei(filename)))
-               return -ENOENT;
+       error = namei(filename,&inode);
+       if (error)
+               return error;
         if (IS_RDONLY(inode)) {
                 iput(inode);
                 return -EROFS;
@@ -151,8 +159,9 @@ int sys_access(const char * filename,int mode)
         int res, i_mode;
  
         mode &= 0007;
-       if (!(inode=namei(filename)))
-               return -EACCES;
+       res = namei(filename,&inode);
+       if (res)
+               return res;
         i_mode = res = inode->i_mode & 0777;
         iput(inode);
         if (current->uid == inode->i_uid)
@@ -176,9 +185,11 @@ int sys_access(const char * filename,int mode)
  int sys_chdir(const char * filename)
  {
         struct inode * inode;
+       int error;
  
-       if (!(inode = namei(filename)))
-               return -ENOENT;
+       error = namei(filename,&inode);
+       if (error)
+               return error;
         if (!S_ISDIR(inode->i_mode)) {
                 iput(inode);
                 return -ENOTDIR;
@@ -195,9 +206,11 @@ int sys_chdir(const char * filename)
  int sys_chroot(const char * filename)
  {
         struct inode * inode;
+       int error;
  
-       if (!(inode=namei(filename)))
-               return -ENOENT;
+       error = namei(filename,&inode);
+       if (error)
+               return error;
         if (!S_ISDIR(inode->i_mode)) {
                 iput(inode);
                 return -ENOTDIR;
@@ -232,9 +245,11 @@ int sys_fchmod(unsigned int fd, mode_t mode)
  int sys_chmod(const char * filename, mode_t mode)
  {
         struct inode * inode;
+       int error;
  
-       if (!(inode = namei(filename)))
-               return -ENOENT;
+       error = namei(filename,&inode);
+       if (error)
+               return error;
         if ((current->euid != inode->i_uid) && !suser()) {
                 iput(inode);
                 return -EPERM;
@@ -274,9 +289,11 @@ int sys_fchown(unsigned int fd, uid_t user, gid_t group)
  int sys_chown(const char * filename, uid_t user, gid_t group)
  {
         struct inode * inode;
+       int error;
  
-       if (!(inode = lnamei(filename)))
-               return -ENOENT;
+       error = lnamei(filename,&inode);
+       if (error)
+               return error;
         if (IS_RDONLY(inode)) {
                 iput(inode);
                 return -EROFS;
@@ -310,7 +327,7 @@ int sys_open(const char * filename,int flag,int mode)
         if (!f)
                 return -ENFILE;
         current->filp[fd] = f;
-       if ((i = open_namei(filename,flag,mode,&inode))<0) {
+       if ((i = open_namei(filename,flag,mode,&inode,NULL))<0) {
                 current->filp[fd]=NULL;
                 f->f_count--;
                 return i;
@@ -338,25 +355,23 @@ int sys_creat(const char * pathname, int mode)
         return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
  }
  
-static int
-close_fp (struct file *filp)
+static int close_fp(struct file *filp)
  {
-   struct inode *inode;
+       struct inode *inode;
  
         if (filp->f_count == 0) {
                 printk("Close: file count is 0\n");
                 return 0;
         }
-
+       inode = filp->f_inode;
+       if (S_ISREG(inode->i_mode))
+               fcntl_remove_locks(current, filp);
         if (filp->f_count > 1) {
                 filp->f_count--;
                 return 0;
         }
-     
-       inode = filp->f_inode;
         if (filp->f_op && filp->f_op->release)
                 filp->f_op->release(inode,filp);
-
         filp->f_count--;
         filp->f_inode = NULL;
         iput(inode);
@@ -376,94 +391,75 @@ int sys_close(unsigned int fd)
         return (close_fp (filp));
  }
  
-/* This routine looks through all the process's and closes any
-   references to the current processes tty.  To avoid problems with
-   process sleeping on an inode which has already been iput, anyprocess
-   which is sleeping on the tty is sent a sigkill (It's probably a rogue
-   process.)  Also no process should ever have /dev/console as it's
-   controlling tty, or have it open for reading.  So we don't have to
-   worry about messing with all the daemons abilities to write messages
-   to the console.  (Besides they should be using syslog.) */
-
-int
-sys_vhangup(void)
+/*
+ * This routine looks through all the process's and closes any
+ * references to the current processes tty.  To avoid problems with
+ * process sleeping on an inode which has already been iput, anyprocess
+ * which is sleeping on the tty is sent a sigkill (It's probably a rogue
+ * process.)  Also no process should ever have /dev/console as it's
+ * controlling tty, or have it open for reading.  So we don't have to
+ * worry about messing with all the daemons abilities to write messages
+ * to the console.  (Besides they should be using syslog.)
+ */
+int sys_vhangup(void)
  {
-   int i;
-   int j;
-   struct file *filep;
-   struct tty_struct *tty;
-   extern void kill_wait (struct wait_queue **q, int signal);
-   extern int kill_pg (int pgrp, int sig, int priv);
-
-   if (!suser()) return (-EPERM);
-
-   /* send the SIGHUP signal. */
-   kill_pg (current->pgrp, SIGHUP, 0);
+       int i,j;
+       struct file *filep;
+       struct tty_struct *tty;
+       extern void kill_wait (struct wait_queue **q, int signal);
+       extern int kill_pg (int pgrp, int sig, int priv);
  
-   /* See if there is a controlling tty. */
-   if (current->tty < 0) return (0);
-
-   for (i = 0; i < NR_TASKS; i++)
-     {
-       if (task[i] == NULL) continue;
-       for (j = 0; j < NR_OPEN; j++)
-         {
-            filep = task[i]->filp[j];
-
-            if (filep == NULL) continue;
-
-            /* now we need to check to see if this file points to the
-               device we are trying to close. */
-
-            if (!S_ISCHR (filep->f_inode->i_mode)) continue;
-
-            /* This will catch both /dev/tty and the explicit terminal
-               device.  However, we must make sure that f_rdev is
-               defined and correct. */
-
-            if ((MAJOR(filep->f_inode->i_rdev) == 5 ||
-                 MAJOR(filep->f_inode->i_rdev) == 4 ) &&
-                (MAJOR(filep->f_rdev) == 4 &&
-                 MINOR(filep->f_rdev) == MINOR (current->tty)))
-              {
-                 task[i]->filp[j] = NULL;
+       if (!suser())
+               return -EPERM;
+       /* send the SIGHUP signal. */
+       kill_pg(current->pgrp, SIGHUP, 0);
+       /* See if there is a controlling tty. */
+       if (current->tty < 0)
+               return 0;
  
+       for (i = 0; i < NR_TASKS; i++) {
+               if (task[i] == NULL)
+                       continue;
+               for (j = 0; j < NR_OPEN; j++) {
+                       filep = task[i]->filp[j];
+                       if (!filep)
+                               continue;
+                       if (!S_ISCHR(filep->f_inode->i_mode))
+                               continue;
+                       if ((MAJOR(filep->f_inode->i_rdev) == 5 ||
+                            MAJOR(filep->f_inode->i_rdev) == 4 ) &&
+                           (MAJOR(filep->f_rdev) == 4 &&
+                            MINOR(filep->f_rdev) == MINOR (current->tty))) {
                   /* so now we have found something to close.  We
                      need to kill every process waiting on the
                      inode. */
-
-                 kill_wait (&filep->f_inode->i_wait, SIGKILL);
+                               task[i]->filp[j] = NULL;
+                               kill_wait (&filep->f_inode->i_wait, SIGKILL);
  
                   /* now make sure they are awake before we close the
                      file. */
  
-                 wake_up (&filep->f_inode->i_wait);
+                               wake_up (&filep->f_inode->i_wait);
  
                   /* finally close the file. */
  
-                 current->close_on_exec &= ~(1<<j);
-                 close_fp (filep);
-              }
-
-         }
-
+                               current->close_on_exec &= ~(1<<j);
+                               close_fp (filep);
+                       }
+               }
         /* can't let them keep a reference to it around.
            But we can't touch current->tty until after the
            loop is complete. */
  
-       if (task[i]->tty == current->tty && task[i] != current)
-         {
-            task[i]->tty = -1;
-         }
-     }
-   
+               if (task[i]->tty == current->tty && task[i] != current) {
+                       task[i]->tty = -1;
+               }
+       }
     /* need to do tty->session = 0 */
-   tty = TTY_TABLE(MINOR(current->tty));
-   tty->session = 0;
-   tty->pgrp = -1;
-   current->tty = -1;
-
-
-   return (0);
+       tty = TTY_TABLE(MINOR(current->tty));
+       tty->session = 0;
+       tty->pgrp = -1;
+       current->tty = -1;
+       return 0;
  }
  
diff --git a/fs/select.c b/fs/select.c

index 5cd8e0f01e924d36a8c27d49e8d0f509762e0d4e..4dc1682b50eb43861a6714b4c0021a930b846e3f 100644 (file)
--- a/fs/select.c
+++ b/fs/select.c
@@ -51,6 +51,8 @@ static int check_in(select_table * wait, struct inode * inode, struct file * fil
  {
         if (file->f_op && file->f_op->select)
                 return file->f_op->select(inode,file,SEL_IN,wait);
+       if (inode && S_ISREG(inode->i_mode))
+               return 1;
         return 0;
  }
  
@@ -58,6 +60,8 @@ static int check_out(select_table * wait, struct inode * inode, struct file * fi
  {
         if (file->f_op && file->f_op->select)
                 return file->f_op->select(inode,file,SEL_OUT,wait);
+       if (inode && S_ISREG(inode->i_mode))
+               return 1;
         return 0;
  }
  
@@ -65,6 +69,8 @@ static int check_ex(select_table * wait, struct inode * inode, struct file * fil
  {
         if (file->f_op && file->f_op->select)
                 return file->f_op->select(inode,file,SEL_EX,wait);
+       if (inode && S_ISREG(inode->i_mode))
+               return 1;
         return 0;
  }
  
@@ -85,15 +91,6 @@ int do_select(fd_set in, fd_set out, fd_set ex,
                         return -EBADF;
                 if (!current->filp[i]->f_inode)
                         return -EBADF;
-               if (current->filp[i]->f_inode->i_pipe)
-                       continue;
-               if (S_ISCHR(current->filp[i]->f_inode->i_mode))
-                       continue;
-               if (S_ISFIFO(current->filp[i]->f_inode->i_mode))
-                       continue;
-               if (S_ISSOCK(current->filp[i]->f_inode->i_mode))
-                       continue;
-               return -EBADF;
         }
  repeat:
         wait_table.nr = 0;
diff --git a/fs/stat.c b/fs/stat.c

index 225f9d3bbecf4a03f6f499ccbf09c6186195151b..459f418a6e1bd76db0c1f677a66e036843f67719 100644 (file)
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -56,35 +56,41 @@ static void cp_new_stat(struct inode * inode, struct new_stat * statbuf)
         tmp.st_mtime = inode->i_mtime;
         tmp.st_ctime = inode->i_ctime;
  /*
- * Right now we fake the st_blocks numbers: we'll eventually have to
- * add st_blocks to the inode, and let the vfs routines keep track of
- * it all. This algorithm doesn't guarantee correct block numbers, but
- * at least it tries to come up with a plausible answer...
- *
- * In fact, the minix fs doesn't use these numbers (it uses 7 and 512
- * instead of 10 and 256), but who cares... It's not that exact anyway.
+ * st_blocks and st_blksize are approximated with a simple algorithm if
+ * they aren't supported directly by the filesystem. The minix and msdos
+ * filesystems don't keep track of blocks, so they would either have to
+ * be counted explicitly (by delving into the file itself), or by using
+ * this simple algorithm to get a reasonable (although not 100% accurate)
+ * value.
   */
-       blocks = (tmp.st_size + 1023) / 1024;
-       if (blocks > 10) {
-               indirect = (blocks - 11)/256+1;
-               if (blocks > 10+256) {
-                       indirect += (blocks - 267)/(256*256)+1;
-                       if (blocks > 10+256+256*256)
-                               indirect++;
+       if (!inode->i_blksize) {
+               blocks = (tmp.st_size + 511) / 512;
+               if (blocks > 10) {
+                       indirect = (blocks - 11)/256+1;
+                       if (blocks > 10+256) {
+                               indirect += (blocks - 267)/(256*256)+1;
+                               if (blocks > 10+256+256*256)
+                                       indirect++;
+                       }
+                       blocks += indirect;
                 }
-               blocks += indirect;
+               tmp.st_blksize = 512;
+               tmp.st_blocks = blocks;
+       } else {
+               tmp.st_blksize = inode->i_blksize;
+               tmp.st_blocks = inode->i_blocks;
         }
-       tmp.st_blksize = 1024;
-       tmp.st_blocks = blocks;
         memcpy_tofs(statbuf,&tmp,sizeof(tmp));
  }
  
  int sys_stat(char * filename, struct old_stat * statbuf)
  {
         struct inode * inode;
+       int error;
  
-       if (!(inode=namei(filename)))
-               return -ENOENT;
+       error = namei(filename,&inode);
+       if (error)
+               return error;
         cp_old_stat(inode,statbuf);
         iput(inode);
         return 0;
@@ -93,9 +99,11 @@ int sys_stat(char * filename, struct old_stat * statbuf)
  int sys_newstat(char * filename, struct new_stat * statbuf)
  {
         struct inode * inode;
+       int error;
  
-       if (!(inode=namei(filename)))
-               return -ENOENT;
+       error = namei(filename,&inode);
+       if (error)
+               return error;
         cp_new_stat(inode,statbuf);
         iput(inode);
         return 0;
@@ -104,9 +112,11 @@ int sys_newstat(char * filename, struct new_stat * statbuf)
  int sys_lstat(char * filename, struct old_stat * statbuf)
  {
         struct inode * inode;
+       int error;
  
-       if (!(inode = lnamei(filename)))
-               return -ENOENT;
+       error = lnamei(filename,&inode);
+       if (error)
+               return error;
         cp_old_stat(inode,statbuf);
         iput(inode);
         return 0;
@@ -115,9 +125,11 @@ int sys_lstat(char * filename, struct old_stat * statbuf)
  int sys_newlstat(char * filename, struct new_stat * statbuf)
  {
         struct inode * inode;
+       int error;
  
-       if (!(inode = lnamei(filename)))
-               return -ENOENT;
+       error = lnamei(filename,&inode);
+       if (error)
+               return error;
         cp_new_stat(inode,statbuf);
         iput(inode);
         return 0;
@@ -148,12 +160,14 @@ int sys_newfstat(unsigned int fd, struct new_stat * statbuf)
  int sys_readlink(const char * path, char * buf, int bufsiz)
  {
         struct inode * inode;
+       int error;
  
         if (bufsiz <= 0)
                 return -EINVAL;
         verify_area(buf,bufsiz);
-       if (!(inode = lnamei(path)))
-               return -ENOENT;
+       error = lnamei(path,&inode);
+       if (error)
+               return error;
         if (!inode->i_op || !inode->i_op->readlink) {
                 iput(inode);
                 return -EINVAL;
diff --git a/fs/super.c b/fs/super.c

index 4d0e0303faf5192d7ce355eef0f6ed98ddb62da6..c3c47dca41658f973585cf18f5f400c7a8dc9b29 100644 (file)
--- a/fs/super.c
+++ b/fs/super.c
@@ -21,6 +21,7 @@
  
  int sync_dev(int dev);
  void wait_for_keypress(void);
+void fcntl_init_locks(void);
  
  /* set_bit uses setb, as gas doesn't recognize setc */
  #define set_bit(bitnr,addr) ({ \
@@ -178,8 +179,9 @@ int sys_umount(char * dev_name)
  
         if (!suser())
                 return -EPERM;
-       if (!(inode = namei(dev_name)))
-               return -ENOENT;
+       retval = namei(dev_name,&inode);
+       if (retval)
+               return retval;
         dev = inode->i_rdev;
         if (!S_ISBLK(inode->i_mode)) {
                 iput(inode);
@@ -208,9 +210,11 @@ static int do_mount(int dev, const char * dir, char * type, int flags, void * da
  {
         struct inode * inode, * dir_i;
         struct super_block * sb;
+       int error;
  
-       if (!(dir_i = namei(dir)))
-               return -ENOENT;
+       error = namei(dir,&dir_i);
+       if (error)
+               return error;
         if (dir_i->i_count != 1 || dir_i->i_mount) {
                 iput(dir_i);
                 return -EBUSY;
@@ -256,7 +260,7 @@ int sys_mount(char * dev_name, char * dir_name, char * type,
  {
         struct inode * inode;
         int dev;
-       int retval = 0;
+       int retval;
         char tmp[100],*t;
         int i;
         unsigned long flags = 0;
@@ -264,8 +268,9 @@ int sys_mount(char * dev_name, char * dir_name, char * type,
  
         if (!suser())
                 return -EPERM;
-       if (!(inode = namei(dev_name)))
-               return -ENOENT;
+       retval = namei(dev_name,&inode);
+       if (retval)
+               return retval;
         dev = inode->i_rdev;
         if (!S_ISBLK(inode->i_mode))
                 retval = -EPERM;
@@ -314,6 +319,7 @@ void mount_root(void)
                 panic("bad i-node size");
         for(i=0;i<NR_FILE;i++)
                 file_table[i].f_count=0;
+       fcntl_init_locks();
         if (MAJOR(ROOT_DEV) == 2) {
                 printk("Insert root floppy and press ENTER");
                 wait_for_keypress();
diff --git a/include/asm/system.h b/include/asm/system.h

index 877ae34777e1bb1d39cc8c7fa0b243bb15e5312a..59b0ba2b4e4dff5a27f76d9bf3f267927863dc43 100644 (file)
--- a/include/asm/system.h
+++ b/include/asm/system.h
@@ -49,8 +49,8 @@ __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
         *((gate_addr)+1) = (((base) & 0x0000ffff)<<16) | \
                 ((limit) & 0x0ffff); }
  
-#define _set_tssldt_desc(n,addr,type) \
-__asm__ __volatile__ ("movw $232,%1\n\t" \
+#define _set_tssldt_desc(n,addr,limit,type) \
+__asm__ __volatile__ ("movw $" #limit ",%1\n\t" \
         "movw %%ax,%2\n\t" \
         "rorl $16,%%eax\n\t" \
         "movb %%al,%3\n\t" \
@@ -58,9 +58,9 @@ __asm__ __volatile__ ("movw $232,%1\n\t" \
         "movb $0x00,%5\n\t" \
         "movb %%ah,%6\n\t" \
         "rorl $16,%%eax" \
-       ::"a" (addr), "m" (*(n)), "m" (*(n+2)), "m" (*(n+4)), \
+       ::"a" (addr+0xc0000000), "m" (*(n)), "m" (*(n+2)), "m" (*(n+4)), \
          "m" (*(n+5)), "m" (*(n+6)), "m" (*(n+7)) \
         )
  
-#define set_tss_desc(n,addr) _set_tssldt_desc(((char *) (n)),addr,"0x89")
-#define set_ldt_desc(n,addr) _set_tssldt_desc(((char *) (n)),addr,"0x82")
+#define set_tss_desc(n,addr) _set_tssldt_desc(((char *) (n)),((int)(addr)),231,"0x89")
+#define set_ldt_desc(n,addr) _set_tssldt_desc(((char *) (n)),((int)(addr)),23,"0x82")
diff --git a/include/linux/ext_fs_i.h b/include/linux/ext_fs_i.h

new file mode 100644 (file)

index 0000000..c64bc62
--- /dev/null
+++ b/include/linux/ext_fs_i.h
@@ -0,0 +1,10 @@
+#ifndef _EXT_FS_I
+#define _EXT_FS_I
+
+/*
+ * extended file system inode data in memory
+ */
+struct ext_inode_info {
+};
+
+#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 2c64bd4b02ad8ce21cc00ff49254c5e16422e4b5..4ce9b6f197753691251bf0fdf911b17ec5822b63 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -111,6 +111,10 @@ struct buffer_head {
         struct buffer_head * b_reqnext;         /* request queue */
  };
  
+#include <linux/minix_fs_i.h>
+#include <linux/ext_fs_i.h>
+#include <linux/msdos_fs_i.h>
+
  struct inode {
         dev_t           i_dev;
         unsigned long   i_ino;
@@ -123,11 +127,14 @@ struct inode {
         time_t          i_atime;
         time_t          i_mtime;
         time_t          i_ctime;
+       unsigned long   i_blksize;
+       unsigned long   i_blocks;
         unsigned long i_data[16];
         struct inode_operations * i_op;
         struct super_block * i_sb;
         struct wait_queue * i_wait;
         struct wait_queue * i_wait2;    /* for pipes */
+       struct file_lock *i_flock;
         unsigned short i_count;
         unsigned short i_flags;
         unsigned char i_lock;
@@ -136,6 +143,11 @@ struct inode {
         unsigned char i_mount;
         unsigned char i_seek;
         unsigned char i_update;
+       union {
+               struct minix_inode_info minix_i;
+               struct ext_inode_info ext_i;
+               struct msdos_inode_info msdos_i;
+       } u;
  };
  
  struct file {
@@ -149,6 +161,16 @@ struct file {
         off_t f_pos;
  };
  
+struct file_lock {
+       struct file_lock *fl_next;      /* singly linked list */
+       struct task_struct *fl_owner;   /* NULL if on free list, for sanity checks */
+       struct wait_queue *fl_wait;
+       char fl_type;
+       char fl_whence;
+       off_t fl_start;
+       off_t fl_end;
+};
+
  #include <linux/minix_fs_sb.h>
  #include <linux/ext_fs_sb.h>
  #include <linux/msdos_fs_sb.h>
@@ -196,7 +218,7 @@ struct inode_operations {
         int (*mknod) (struct inode *,const char *,int,int,int);
         int (*rename) (struct inode *,const char *,int,struct inode *,const char *,int);
         int (*readlink) (struct inode *,char *,int);
-       struct inode * (*follow_link) (struct inode *, struct inode *);
+       int (*follow_link) (struct inode *, struct inode *, int flag, int mode, struct inode ** res_inode);
         int (*bmap) (struct inode *,int);
         void (*truncate) (struct inode *);
  };
@@ -239,13 +261,11 @@ extern void floppy_off(unsigned int dev);
  extern void sync_inodes(void);
  extern void wait_on(struct inode * inode);
  extern int bmap(struct inode * inode,int block);
-extern struct inode * namei(const char * pathname);
-extern struct inode * lnamei(const char * pathname);
+extern int namei(const char * pathname, struct inode ** res_inode);
+extern int lnamei(const char * pathname, struct inode ** res_inode);
  extern int permission(struct inode * inode,int mask);
-extern struct inode * _namei(const char * filename, struct inode * base,
-       int follow_links);
  extern int open_namei(const char * pathname, int flag, int mode,
-       struct inode ** res_inode);
+       struct inode ** res_inode, struct inode * base);
  extern int do_mknod(const char * filename, int mode, int dev);
  extern void iput(struct inode * inode);
  extern struct inode * iget(int dev,int nr);
diff --git a/include/linux/head.h b/include/linux/head.h

index b871742accb7bfdf241d8617ac018188b71d3408..8911a68198debe13dbf036ba9a30e13844a0691f 100644 (file)
--- a/include/linux/head.h
+++ b/include/linux/head.h
@@ -5,7 +5,7 @@ typedef struct desc_struct {
         unsigned long a,b;
  } desc_table[256];
  
-extern unsigned long pg_dir[1024];
+extern unsigned long swapper_pg_dir[1024];
  extern desc_table idt,gdt;
  
  #define GDT_NUL 0
diff --git a/include/linux/limits.h b/include/linux/limits.h

index 1de038822b999b40d0458fdaab83abe6ca238c0d..f3912fa3d32a8d2c5198374806402b9748dcd1d2 100644 (file)
--- a/include/linux/limits.h
+++ b/include/linux/limits.h
@@ -8,6 +8,7 @@
  #define NR_FILE 128
  #define NR_SUPER 8
  #define NR_HASH 997
+#define NR_FILE_LOCKS 32
  #define BLOCK_SIZE 1024
  #define BLOCK_SIZE_BITS 10
  #define MAX_CHRDEV 16
diff --git a/include/linux/minix_fs_i.h b/include/linux/minix_fs_i.h

new file mode 100644 (file)

index 0000000..dabe5af
--- /dev/null
+++ b/include/linux/minix_fs_i.h
@@ -0,0 +1,10 @@
+#ifndef _MINIX_FS_I
+#define _MINIX_FS_I
+
+/*
+ * minix fs inode data in memory
+ */
+struct minix_inode_info {
+};
+
+#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h

index a7b6af23b65afad40ef8a6bfc2875fe8c0a0a100..9a0ff3e478a97a3f3ea8f4991fa0ef6c004fa548 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -57,10 +57,12 @@ extern void rw_swap_page(int rw, unsigned int nr, char * buf);
  /* memory.c */
         
  extern unsigned long get_free_page(int priority);
-extern unsigned long put_dirty_page(unsigned long page,unsigned long address);
+extern unsigned long put_dirty_page(struct task_struct * tsk,unsigned long page,
+       unsigned long address);
  extern void free_page(unsigned long addr);
-extern int free_page_tables(unsigned long from,unsigned long size);
-extern int copy_page_tables(unsigned long from,unsigned long to,long size);
+extern void free_page_tables(struct task_struct * tsk);
+extern void clear_page_tables(struct task_struct * tsk);
+extern int copy_page_tables(struct task_struct * new);
  extern int unmap_page_range(unsigned long from, unsigned long size);
  extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size,
          int permiss);
@@ -82,7 +84,7 @@ extern void swap_free(unsigned int page_nr);
  extern void swap_in(unsigned long *table_ptr);
  
  #define invalidate() \
-__asm__("movl %%eax,%%cr3"::"a" (0))
+__asm__ __volatile__("movl %%cr3,%%eax\n\tmovl %%eax,%%cr3":::"ax")
  
  extern unsigned long low_memory;
  extern unsigned long high_memory;
diff --git a/include/linux/msdos_fs_i.h b/include/linux/msdos_fs_i.h

new file mode 100644 (file)

index 0000000..bd900c0
--- /dev/null
+++ b/include/linux/msdos_fs_i.h
@@ -0,0 +1,10 @@
+#ifndef _MSDOS_FS_I
+#define _MSDOS_FS_I
+
+/*
+ * msdos file system inode data in memory
+ */
+struct msdos_inode_info {
+};
+
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 21a3fbbe460632a3ab634aa41308c46260717a74..fab929f41df8b794f5066498cb6796103d3f5c26 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3,33 +3,22 @@
  
  #define HZ 100
  
+/*
+ * This is the maximum nr of tasks - change it if you need to
+ */
  #define NR_TASKS       64
-#define TASK_SIZE      0x04000000
-#define LIBRARY_SIZE   0x00400000
+
+/*
+ * User space process size: 3GB. This is hardcoded into a few places,
+ * so don't change it unless you know what you are doing.
+ */
+#define TASK_SIZE      0xc0000000
  
  /*
   * Size of io_bitmap in longwords: 32 is ports 0-0x3ff.
   */
  #define IO_BITMAP_SIZE 32
  
-#if (TASK_SIZE & 0x3fffff)
-#error "TASK_SIZE must be multiple of 4M"
-#endif
-
-#if (LIBRARY_SIZE & 0x3fffff)
-#error "LIBRARY_SIZE must be a multiple of 4M"
-#endif
-
-#if (LIBRARY_SIZE >= (TASK_SIZE/2))
-#error "LIBRARY_SIZE too damn big!"
-#endif
-
-#if (((TASK_SIZE>>16)*NR_TASKS) != 0x10000)
-#error "TASK_SIZE*NR_TASKS must be 4GB"
-#endif
-
-#define LIBRARY_OFFSET (TASK_SIZE - LIBRARY_SIZE)
-
  #define CT_TO_SECS(x)  ((x) / HZ)
  #define CT_TO_USECS(x) (((x) % HZ) * 1000000/HZ)
  
@@ -43,6 +32,7 @@
  #include <linux/time.h>
  #include <linux/param.h>
  #include <linux/resource.h>
+#include <linux/vm86.h>
  
  #if (NR_OPEN > 32)
  #error "Currently the close-on-exec-flags and select masks are in one long, max 32 files/proc"
@@ -115,6 +105,7 @@ struct task_struct {
         long signal;
         struct sigaction sigaction[32];
         long blocked;   /* bitmap of masked signals */
+       unsigned long saved_kernel_stack;
  /* various fields */
         int exit_code;
         int dumpable:1;
@@ -146,6 +137,7 @@ struct task_struct {
         unsigned short used_math;
         unsigned short rss;     /* number of resident pages */
         char comm[8];
+       struct vm86_struct * vm86_info;
  /* file system info */
         int link_count;
         int tty;                /* -1 if no tty, so it must be signed */
@@ -157,6 +149,7 @@ struct task_struct {
                 struct inode * library;
                 unsigned long start;
                 unsigned long length;
+               unsigned long bss;
         } libraries[MAX_SHARED_LIBS];
         int numlibraries;
         struct file * filp[NR_OPEN];
@@ -173,9 +166,6 @@ struct task_struct {
  #define PF_ALIGNWARN   0x00000001      /* Print alignment warning msgs */
                                         /* Not implemented yet, only for 486*/
  #define PF_PTRACED     0x00000010      /* set if ptrace (0) has been called. */
-#define PF_VM86                0x00000020      /* set if process can execute a vm86 */
-                                       /* task. */
-                                        /* not impelmented. */
  
  /*
   *  INIT_TASK is used to set up the first task table, touch at
@@ -183,7 +173,7 @@ struct task_struct {
   */
  #define INIT_TASK \
  /* state etc */        { 0,15,15, \
-/* signals */  0,{{},},0, \
+/* signals */  0,{{},},0,0, \
  /* ec,brk... */        0,0,0,0,0,0,0,0, \
  /* pid etc.. */        0,0,0,0, \
  /* suppl grps*/ {NOGROUP,}, \
@@ -199,15 +189,16 @@ struct task_struct {
  /* math */     0, \
  /* rss */      2, \
  /* comm */     "swapper", \
+/* vm86_info */        NULL, \
  /* fs info */  0,-1,0022,NULL,NULL,NULL, \
  /* libraries */        { { NULL, 0, 0}, }, 0, \
  /* filp */     {NULL,}, 0, \
                 { \
                         {0,0}, \
-/* ldt */              {0x9f,0xc0fa00}, \
-                       {0x9f,0xc0f200} \
+/* ldt */              {0x9f,0xc0c0fa00}, \
+                       {0x9f,0xc0c0f200} \
                 }, \
-/*tss*/        {0,PAGE_SIZE+(long)&init_task,0x10,0,0,0,0,(long)&pg_dir,\
+/*tss*/        {0,PAGE_SIZE+(long)&init_task,0x10,0,0,0,0,(long)&swapper_pg_dir,\
          0,0,0,0,0,0,0,0, \
          0,0,0x17,0x17,0x17,0x17,0x17,0x17, \
          _LDT(0),0x80000000,{0xffffffff}, \
diff --git a/include/linux/sys.h b/include/linux/sys.h

index a479faa9b05a0e2c4ef9f0048a987e1927593c76..885fe2b1e25f6872c2150dbfc22f77779fa3bc93 100644 (file)
--- a/include/linux/sys.h
+++ b/include/linux/sys.h
@@ -115,6 +115,7 @@ extern int sys_newuname();
  extern int sys_iopl();
  extern int sys_vhangup();
  extern int sys_idle();
+extern int sys_vm86();
  
  fn_ptr sys_call_table[] = { sys_setup, sys_exit, sys_fork, sys_read,
  sys_write, sys_open, sys_close, sys_waitpid, sys_creat, sys_link,
@@ -137,7 +138,7 @@ sys_truncate, sys_ftruncate, sys_fchmod, sys_fchown, sys_getpriority,
  sys_setpriority, sys_profil, sys_statfs, sys_fstatfs, sys_ioperm,
  sys_socketcall, sys_syslog, sys_setitimer, sys_getitimer, sys_newstat,
  sys_newlstat, sys_newfstat, sys_newuname, sys_iopl, sys_vhangup,
-sys_idle };
+sys_idle, sys_vm86 };
  
  /* So we don't have to do any more manual updating.... */
  int NR_syscalls = sizeof(sys_call_table)/sizeof(fn_ptr);
diff --git a/include/linux/unistd.h b/include/linux/unistd.h

index 569c552922f8b883bcabe3409fe1ea968616a048..a15853fc695a78b210139160b7d73080a23bb6c2 100644 (file)
--- a/include/linux/unistd.h
+++ b/include/linux/unistd.h
@@ -119,6 +119,7 @@
  #define __NR_iopl              110
  #define __NR_vhangup           111
  #define __NR_idle              112
+#define __NR_vm86              113
  
  extern int errno;
  
diff --git a/include/linux/vm86.h b/include/linux/vm86.h

new file mode 100644 (file)

index 0000000..96b8959
--- /dev/null
+++ b/include/linux/vm86.h
@@ -0,0 +1,55 @@
+#ifndef _LINUX_VM86_H
+#define _LINUX_VM86_H
+
+#define VM_MASK 0x00020000
+
+/*
+ * This is the stack-layout when we have done a "SAVE_ALL" from vm86
+ * mode - the main change is that the old segment descriptors aren't
+ * useful any more and are forced to be zero by the kernel (and the
+ * hardware when a trap occurs), and the real segment descriptors are
+ * at the end of the structure. Look at ptrace.h to see the "normal"
+ * setup.
+ */
+
+struct vm86_regs {
+/*
+ * normal regs, with special meaning for the segment descriptors..
+ */
+       long ebx;
+       long ecx;
+       long edx;
+       long esi;
+       long edi;
+       long ebp;
+       long eax;
+       long __null_ds;
+       long __null_es;
+       long __null_fs;
+       long __null_gs;
+       long orig_eax;
+       long eip;
+       long cs;
+       long eflags;
+       long esp;
+       long ss;
+/*
+ * these are specific to v86 mode:
+ */
+       long es;
+       long ds;
+       long fs;
+       long gs;
+};
+
+/*
+ * flags isn't even used yet: it's just there as an example of
+ * what kind of information we might want to give sys_vm86() (or
+ * want it to return to us).
+ */
+struct vm86_struct {
+       struct vm86_regs regs;
+       unsigned long flags;
+};
+
+#endif
diff --git a/kernel/Makefile b/kernel/Makefile

index 695e77c5a352edc8d505b4a19c88a1585308fb47..3bb280ed71d9c27a248960b4e551a47fee66a627 100644 (file)
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -29,25 +29,25 @@ kernel.o: $(OBJS)
         sync
  
  kernelsubdirs: dummy
-       @for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE)) || exit; done
+       @for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE)) || exit; done
  
  sys_call.s: sys_call.S
  
  sys_call.o: sys_call.s
  
  sched.o: sched.c
-       $(CC) $(CFLAGS) -fno-omit-frame-pointer -c $<
+       $(CC) $(CFLAGS) $(PROFILING) -fno-omit-frame-pointer -c $<
  
  clean:
         rm -f core *.o *.a tmp_make sys_call.s
         for i in *.c;do rm -f `basename $$i .c`.s;done
-       for i in $(SUBDIRS); do (cd $$i; $(MAKE) clean); done
+       for i in $(SUBDIRS); do (cd $$i && $(MAKE) clean); done
  
  dep:
         sed '/\#\#\# Dependencies/q' < Makefile > tmp_make
         for i in *.c;do $(CPP) -M $$i;done >> tmp_make
         cp tmp_make Makefile
-       for i in $(SUBDIRS); do (cd $$i; $(MAKE) dep) || exit; done
+       for i in $(SUBDIRS); do (cd $$i && $(MAKE) dep) || exit; done
  
  dummy:
  
diff --git a/kernel/blk_drv/Makefile b/kernel/blk_drv/Makefile

index 232a0c8d89249956208b4e177fde5077e12ef6e3..55d48ca645ee8e1f364c631039bb864066d6fe5a 100644 (file)
--- a/kernel/blk_drv/Makefile
+++ b/kernel/blk_drv/Makefile
@@ -28,18 +28,18 @@ blk_drv.a: $(OBJS)
         sync
  
  scsisubdirs: dummy
-       @for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE)) || exit; done
+       @for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE)) || exit; done
  
  clean:
         rm -f core *.o *.a tmp_make
         for i in *.c;do rm -f `basename $$i .c`.s;done
-       for i in $(SUBDIRS); do (cd $$i; $(MAKE) clean); done
+       for i in $(SUBDIRS); do (cd $$i && $(MAKE) clean); done
  
  dep:
         sed '/\#\#\# Dependencies/q' < Makefile > tmp_make
         for i in *.c;do $(CPP) -M $$i;done >> tmp_make
         cp tmp_make Makefile
-       for i in $(SUBDIRS); do (cd $$i; $(MAKE) dep); done
+       for i in $(SUBDIRS); do (cd $$i && $(MAKE) dep); done
  
  dummy:
  
diff --git a/kernel/blk_drv/blk.h b/kernel/blk_drv/blk.h

index 70dab1bfdfc7fde1643dac22828f911b1f561c9e..2288c57fc1e7c8c820301af3f02adfd63c5bfa11 100644 (file)
--- a/kernel/blk_drv/blk.h
+++ b/kernel/blk_drv/blk.h
@@ -69,6 +69,7 @@ extern struct wait_queue * wait_for_request;
  
  extern int * blk_size[NR_BLK_DEV];
  
+extern unsigned long hd_init(unsigned long mem_start, unsigned long mem_end);
  extern int is_read_only(int dev);
  extern void set_device_ro(int dev,int flag);
  
diff --git a/kernel/blk_drv/hd.c b/kernel/blk_drv/hd.c

index f8d7d2102d28cff7e09ba0afda7082c906753c3a..77c76fcbbf880a4d5268ca087c56686559c75429 100644 (file)
--- a/kernel/blk_drv/hd.c
+++ b/kernel/blk_drv/hd.c
@@ -53,7 +53,8 @@ static inline unsigned char CMOS_READ(unsigned char addr)
  static void recal_intr(void);
  static void bad_rw_intr(void);
  
-static int recalibrate = 0;
+static char recalibrate[ MAX_HD ] = { 0, };
+
  static int reset = 0;
  
  #if (HD_DELAY > 0)
@@ -221,6 +222,8 @@ void unexpected_hd_interrupt(void)
  
  static void bad_rw_intr(void)
  {
+       int i;
+
         if (!CURRENT)
                 return;
         if (++CURRENT->errors >= MAX_ERRORS)
@@ -228,7 +231,8 @@ static void bad_rw_intr(void)
         else if (CURRENT->errors > MAX_ERRORS/2)
                 reset = 1;
         else
-               recalibrate = 1;
+               for (i=0; i < NR_HD; i++)
+                       recalibrate[i] = 1;
  }
  
  static inline int wait_DRQ(void)
@@ -378,7 +382,7 @@ static void hd_times_out(void)
  static void do_hd_request(void)
  {
         unsigned int block,dev;
-       unsigned int sec,head,cyl;
+       unsigned int sec,head,cyl,track;
         unsigned int nsect;
  
  repeat:
@@ -399,24 +403,26 @@ repeat:
         }
         block += hd[dev].start_sect;
         dev >>= 6;
-       sec = block % hd_info[dev].sect;
-       block /= hd_info[dev].sect;
-       head = block % hd_info[dev].head;
-       cyl = block / hd_info[dev].head;
-       sec++;
+       sec = block % hd_info[dev].sect + 1;
+       track = block / hd_info[dev].sect;
+       head = track % hd_info[dev].head;
+       cyl = track / hd_info[dev].head;
  #ifdef DEBUG
         printk("hd%d : cyl = %d, head = %d, sector = %d, buffer = %08x\n",
                 dev, cyl, head, sec, CURRENT->buffer);
  #endif
         cli();
         if (reset) {
-               recalibrate = 1;
+               int i;
+
+               for (i=0; i < NR_HD; i++)
+                       recalibrate[i] = 1;
                 reset_hd();
                 sti();
                 return;
         }
-       if (recalibrate) {
-               recalibrate = 0;
+       if (recalibrate[dev]) {
+               recalibrate[dev] = 0;
                 hd_out(dev,hd_info[dev].sect,0,0,0,WIN_RESTORE,&recal_intr);
                 if (reset)
                         goto repeat;
@@ -434,13 +440,16 @@ repeat:
                 }
                 port_write(HD_DATA,CURRENT->buffer,256);
                 sti();
-       } else if (CURRENT->cmd == READ) {
+               return;
+       }
+       if (CURRENT->cmd == READ) {
                 hd_out(dev,nsect,sec,head,cyl,WIN_READ,&read_intr);
                 if (reset)
                         goto repeat;
                 sti();
-       } else
-               panic("unknown hd-command");
+               return;
+       }
+       panic("unknown hd-command");
  }
  
  static int hd_ioctl(struct inode * inode, struct file * file,
@@ -481,7 +490,6 @@ static void hd_release(struct inode * inode, struct file * file)
         sync_dev(inode->i_rdev);
  }
  
-
  static void hd_geninit();
  
  static struct gendisk hd_gendisk = {
@@ -500,11 +508,11 @@ static struct gendisk hd_gendisk = {
         
  static void hd_geninit(void)
  {
-       int drive;
+       int drive, i;
  #ifndef HD_TYPE
         extern struct drive_info drive_info;
         void *BIOS = (void *) &drive_info;
-       int cmos_disks, i;
+       int cmos_disks;
            
         for (drive=0 ; drive<2 ; drive++) {
                 hd_info[drive].cyl = *(unsigned short *) BIOS;
@@ -593,7 +601,7 @@ static struct sigaction hd_sigaction = {
         NULL
  };
  
-unsigned long hd_init(unsigned long mem_start)
+unsigned long hd_init(unsigned long mem_start, unsigned long mem_end)
  {
         blk_dev[MAJOR_NR].request_fn = DEVICE_REQUEST;
         blkdev_fops[MAJOR_NR] = &hd_fops;
diff --git a/kernel/blk_drv/ll_rw_blk.c b/kernel/blk_drv/ll_rw_blk.c

index 1a5f6404c0a92a8f1cc45a8f1201c67204977165..058900966c731418c80baeb1747e86d9005fdc05 100644 (file)
--- a/kernel/blk_drv/ll_rw_blk.c
+++ b/kernel/blk_drv/ll_rw_blk.c
@@ -102,9 +102,6 @@ void set_device_ro(int dev,int flag)
   * add-request adds a request to the linked list.
   * It disables interrupts so that it can muck with the
   * request-lists in peace.
- *
- * Note that swapping requests always go before other requests,
- * and are done in the order they appear.
   */
  static void add_request(struct blk_dev_struct * dev, struct request * req)
  {
@@ -121,11 +118,6 @@ static void add_request(struct blk_dev_struct * dev, struct request * req)
                 return;
         }
         for ( ; tmp->next ; tmp = tmp->next) {
-               if (!req->bh)
-                       if (tmp->next->bh)
-                               break;
-                       else
-                               continue;
                 if ((IN_ORDER(tmp,req) ||
                     !IN_ORDER(tmp,tmp->next)) &&
                     IN_ORDER(req,tmp->next))
@@ -208,9 +200,10 @@ repeat:
         sti();
         goto repeat;
  
-found: sti();
+found:
  /* fill up the request-info, and add it to the queue */
         req->dev = bh->b_dev;
+       sti();
         req->cmd = rw;
         req->errors = 0;
         req->sector = sector;
diff --git a/kernel/chr_drv/mem.c b/kernel/chr_drv/mem.c

index 14962393b921777a71d126a56e4ac590da87f946..6caba5b6ea83494c9e4329d40f93e2c2b51cab6b 100644 (file)
--- a/kernel/chr_drv/mem.c
+++ b/kernel/chr_drv/mem.c
@@ -38,7 +38,7 @@ static int read_mem(struct inode * inode, struct file * file,char * buf, int cou
         while (count > 0) {
                 if (current->signal & ~current->blocked)
                         break;
-               pde = (unsigned long) pg_dir + (addr >> 20 & 0xffc);
+               pde = current->tss.cr3 + (addr >> 20 & 0xffc);
                 pte = *(unsigned long *) pde;
                 if (!(pte & PAGE_PRESENT))
                         break;
@@ -75,7 +75,7 @@ static int write_mem(struct inode * inode, struct file * file,char * buf, int co
         while (count > 0) {
                 if (current->signal & ~current->blocked)
                         break;
-               pde = (unsigned long) pg_dir + (addr >> 20 & 0xffc);
+               pde = current->tss.cr3 + (addr >> 20 & 0xffc);
                 pte = *(unsigned long *) pde;
                 if (!(pte & PAGE_PRESENT))
                         break;
diff --git a/kernel/exit.c b/kernel/exit.c

index 8cc5451a52efb7f8bc7e93fba66c74ed396e2ebe..11ec282459a1cb09f89fed02354a3c325b45a03d 100644 (file)
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -321,8 +321,7 @@ volatile void do_exit(long code)
         int i;
  
  fake_volatile:
-       free_page_tables(get_base(current->ldt[1]),get_limit(0x0f));
-       free_page_tables(get_base(current->ldt[2]),get_limit(0x17));
+       free_page_tables(current);
         for (i=0 ; i<NR_OPEN ; i++)
                 if (current->filp[i])
                         sys_close(i);
diff --git a/kernel/fork.c b/kernel/fork.c

index e80abe8f7050d0e37dec384cb4b000f3e47ae030..8024f29b58a57e8630935e79dc0640f53213c821 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -56,15 +56,12 @@ int copy_mem(int nr,struct task_struct * p)
         }
         if (data_limit < code_limit)
                 panic("Bad data_limit");
-       new_data_base = new_code_base = nr * TASK_SIZE;
+       new_data_base = old_data_base;
+       new_code_base = old_code_base;
         p->start_code = new_code_base;
         set_base(p->ldt[1],new_code_base);
         set_base(p->ldt[2],new_data_base);
-       if (copy_page_tables(old_data_base,new_data_base,data_limit)) {
-               free_page_tables(new_data_base,data_limit);
-               return -ENOMEM;
-       }
-       return 0;
+       return copy_page_tables(p);
  }
  
  static int find_empty_process(void)
diff --git a/kernel/math/emulate.c b/kernel/math/emulate.c

index 1df0691ba030a5460a0669ce9407651944131526..9c86a6b3b08248a161cf65d276fb4652a9a245a3 100644 (file)
--- a/kernel/math/emulate.c
+++ b/kernel/math/emulate.c
@@ -62,6 +62,9 @@ static void do_emu(struct info * info)
         else
                 I387.swd &= 0x7fff;
         ORIG_EIP = EIP;
+/* We cannot handle emulation in v86-mode */
+       if (EFLAGS & 0x00020000)
+               math_abort(info,SIGILL);
  /* 0x0007 means user code space */
         if (CS != 0x000F) {
                 printk("math_emulate: %04x:%08x\n\r",CS,EIP);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c

index 07f3a8d5f48cedb14808fb6af53297ab5dfc59c7..73575abf94af98240c0571e95d2c88073a4b7544 100644 (file)
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -88,7 +88,6 @@ static unsigned long get_long(struct task_struct * tsk,
  {
         unsigned long page;
  
-       addr += tsk->start_code;
  repeat:
         page = tsk->tss.cr3 + ((addr >> 20) & 0xffc);
         page = *(unsigned long *) page;
@@ -117,7 +116,6 @@ static void put_long(struct task_struct * tsk, unsigned long addr,
  {
         unsigned long page;
  
-       addr += tsk->start_code;
  repeat:
         page = tsk->tss.cr3 + ((addr >> 20) & 0xffc);
         page = *(unsigned long *) page;
diff --git a/kernel/sched.c b/kernel/sched.c

index 1e8a710a8663524c546494026cfec2460283ba85..9a4baea2447e6cdbd72715ecd6283bc182c57702 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -393,7 +393,7 @@ struct timer_struct timer_table[32];
   * irq uses this to decide if it should update the user or system
   * times.
   */
-static void do_timer(int regs)
+static void do_timer(struct pt_regs * regs)
  {
         unsigned long mask;
         struct timer_struct *tp = timer_table+0;
@@ -401,7 +401,7 @@ static void do_timer(int regs)
         static int avg_cnt = 0;
  
         jiffies++;
-       if (3 & ((struct pt_regs *) regs)->cs) {
+       if ((VM_MASK & regs->eflags) || (3 & regs->cs)) {
                 current->utime++;
                 /* Update ITIMER_VIRT for current task if not in a system call */
                 if (current->it_virt_value && !(--current->it_virt_value)) {
@@ -412,7 +412,7 @@ static void do_timer(int regs)
                 current->stime++;
  #ifdef PROFILE_SHIFT
                 if (prof_buffer && current != task[0]) {
-                       unsigned long eip = ((struct pt_regs *) regs)->eip;
+                       unsigned long eip = regs->eip;
                         eip >>= PROFILE_SHIFT;
                         if (eip < prof_len)
                                 prof_buffer[eip]++;
@@ -543,5 +543,5 @@ void sched_init(void)
         outb_p(0x36,0x43);              /* binary, mode 3, LSB/MSB, ch 0 */
         outb_p(LATCH & 0xff , 0x40);    /* LSB */
         outb(LATCH >> 8 , 0x40);        /* MSB */
-       request_irq(TIMER_IRQ,do_timer);
+       request_irq(TIMER_IRQ,(void (*)(int)) do_timer);
  }
diff --git a/kernel/signal.c b/kernel/signal.c

index ca850fa7b121c0bb1383afb642252773db3cda96..85b248b95f972e4669103717281588c259138cc5 100644 (file)
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -135,11 +135,6 @@ int do_signal(long signr,struct pt_regs * regs)
         int longs;
         unsigned long * tmp_esp;
  
-#ifdef notdef
-       printk("pid: %d, signr: %x, eax=%d, oeax = %d, int=%d\n", 
-               current->pid, signr, regs->eax, regs->orig_eax, 
-               sa->sa_flags & SA_INTERRUPT);
-#endif
         sa_handler = (unsigned long) sa->sa_handler;
         if ((regs->orig_eax != -1) &&
             ((regs->eax == -ERESTARTSYS) || (regs->eax == -ERESTARTNOINTR))) {
diff --git a/kernel/sys.c b/kernel/sys.c

index 94a8de54e76587fffa9f6d3907b5f3e7b48beb3d..7368805f1c39c4e2ac14d99424ff248b00aec271 100644 (file)
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -13,12 +13,14 @@
  #include <linux/utsname.h>
  #include <linux/param.h>
  #include <linux/resource.h>
+#include <linux/signal.h>
  #include <linux/string.h>
+#include <linux/ptrace.h>
  
  #include <asm/segment.h>
  
  /*
- * this indicates wether you can reboot with ctrl-alt-del: the deault is yes
+ * this indicates wether you can reboot with ctrl-alt-del: the default is yes
   */
  static int C_A_D = 1;
  
@@ -128,6 +130,53 @@ int sys_prof()
         return -ENOSYS;
  }
  
+unsigned long save_v86_state(int signr,struct vm86_regs * regs)
+{
+       unsigned long stack;
+
+       if (!current->vm86_info) {
+               printk("no vm86_info: BAD\n");
+               do_exit(SIGSEGV);
+       }
+       memcpy_tofs(&(current->vm86_info->regs),regs,sizeof(*regs));
+       stack = current->tss.esp0;
+       current->tss.esp0 = current->saved_kernel_stack;
+       current->saved_kernel_stack = 0;
+       return stack;
+}
+
+int sys_vm86(struct vm86_struct * v86)
+{
+       struct vm86_struct info;
+       struct pt_regs * pt_regs = (struct pt_regs *) &v86;
+
+       if (current->saved_kernel_stack)
+               return -EPERM;
+       memcpy_fromfs(&info,v86,sizeof(info));
+/*
+ * make sure the vm86() system call doesn't try to do anything silly
+ */
+       info.regs.__null_ds = 0;
+       info.regs.__null_es = 0;
+       info.regs.__null_fs = 0;
+       info.regs.__null_gs = 0;
+/*
+ * The eflags register is also special: we cannot trust that the user
+ * has set it up safely, so this makes sure interrupt etc flags are
+ * inherited from protected mode.
+ */
+       info.regs.eflags &= 0x00000dd5;
+       info.regs.eflags |= 0xfffff22a & pt_regs->eflags;
+       info.regs.eflags |= VM_MASK;
+       current->saved_kernel_stack = current->tss.esp0;
+       current->tss.esp0 = (unsigned long) pt_regs;
+       current->vm86_info = v86;
+       __asm__ __volatile__("movl %0,%%esp\n\t"
+               "pushl $ret_from_sys_call\n\t"
+               "ret"::"g" ((long) &(info.regs)),"a" (info.regs.eax));
+       return 0;
+}
+
  extern void hard_reset_now(void);
  
  /*
diff --git a/kernel/sys_call.S b/kernel/sys_call.S

index 44c072326c55e23381cebd23b38989f8b98fc554..916455ef628a3b1edc5d0d7238a24bf82ae70b37 100644 (file)
--- a/kernel/sys_call.S
+++ b/kernel/sys_call.S
@@ -58,6 +58,10 @@ EFLAGS               = 0x38
  OLDESP         = 0x3C
  OLDSS          = 0x40
  
+IF_MASK                = 0x00000200
+NT_MASK                = 0x00004000
+VM_MASK                = 0x00020000
+
  /*
   * these are offsets into the task-struct.
   */
@@ -67,6 +71,7 @@ priority      = 8
  signal         = 12
  sigaction      = 16            # MUST be 16 (=len of sigaction)
  blocked                = (33*16)
+saved_kernel_stack = ((33*16)+4)
  
  /*
   * offsets within sigaction
@@ -121,11 +126,17 @@ _system_call:
         movl %eax,EAX(%esp)             # save the return value
         .align 4,0x90
  ret_from_sys_call:
+       movl EFLAGS(%esp),%eax
+       testl $VM_MASK,%eax
+       jne 1f
         cmpw $0x0f,CS(%esp)             # was old code segment supervisor ?
         jne 2f
         cmpw $0x17,OLDSS(%esp)          # was stack segment = 0x17 ?
         jne 2f
-1:     cmpl $0,_need_resched
+1:     orl $IF_MASK,%eax               # these just try to make sure
+       andl $~NT_MASK,%eax             # the program doesn't do anything
+       movl %eax,EFLAGS(%esp)          # stupid
+       cmpl $0,_need_resched
         jne reschedule
         movl _current,%eax
         cmpl _task,%eax                 # task[0] cannot have signals
@@ -141,10 +152,18 @@ ret_from_sys_call:
         bsfl %ecx,%ecx
         je 2f
         btrl %ecx,%ebx
+       incl %ecx
         movl %ebx,signal(%eax)
         movl %esp,%ebx
+       testl $VM_MASK,EFLAGS(%esp)
+       je 3f
         pushl %ebx
-       incl %ecx
+       pushl %ecx
+       call _save_v86_state
+       popl %ecx
+       movl %eax,%ebx
+       movl %eax,%esp
+3:     pushl %ebx
         pushl %ecx
         call _do_signal
         popl %ecx
diff --git a/kernel/traps.c b/kernel/traps.c

index 8d7d0397f1224c76416807a514ef74e21a4e1558..9a5f086c6885dd2a071640456be2a9bab425936a 100644 (file)
--- a/kernel/traps.c
+++ b/kernel/traps.c
@@ -63,7 +63,7 @@ static void die_if_kernel(char * str,long esp_ptr,long nr)
         long * esp = (long *) esp_ptr;
         int i;
  
-       if ((0xffff & esp[1]) == 0xf)
+       if ((esp[2] & VM_MASK) || ((0xffff & esp[1]) == 0xf))
                 return;
         printk("%s: %04x\n\r",str,nr&0xffff);
         printk("EIP:    %04x:%p\nEFLAGS: %p\n", 0xffff & esp[1],esp[0],esp[2]);
diff --git a/mm/memory.c b/mm/memory.c

index 1595b4f1d949f968f6cdbd24a47c501ae244fd02..5fd804b63bdb04e9afbf1f932b8e4ccdde886dc1 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -34,11 +34,9 @@
  #include <linux/sched.h>
  #include <linux/head.h>
  #include <linux/kernel.h>
+#include <linux/errno.h>
  #include <linux/string.h>
  
-#define CODE_SPACE(addr) ((((addr)+4095)&~4095) < \
-current->start_code + current->end_code)
-
  unsigned long low_memory = 0;
  unsigned long high_memory = 0;
  unsigned long free_page_list = 0;
@@ -85,45 +83,92 @@ void free_page(unsigned long addr)
         printk("trying to free free page (%08x): memory probably corrupted\n",addr);
  }
  
+static void free_one_table(unsigned long * page_dir)
+{
+       int j;
+       unsigned long pg_table = *page_dir;
+       unsigned long * page_table;
+
+       if (!pg_table)
+               return;
+       if (!(pg_table & 1)) {
+               printk("Bad page table: [%08x]=%08x\n",page_dir,pg_table);
+               *page_dir = 0;
+               return;
+       }
+       *page_dir = 0;
+       if (pg_table < low_memory)
+               return;
+       page_table = (unsigned long *) (pg_table & 0xfffff000);
+       for (j = 0 ; j < 1024 ; j++,page_table++) {
+               unsigned long pg = *page_table;
+               
+               if (!pg)
+                       continue;
+               *page_table = 0;
+               if (1 & pg)
+                       free_page(0xfffff000 & pg);
+               else
+                       swap_free(pg >> 1);
+       }
+       free_page(0xfffff000 & pg_table);
+}
+
  /*
- * This function frees a continuos block of page tables, as needed
- * by 'exit()'. As does copy_page_tables(), this handles only 4Mb blocks.
+ * This function clears all user-level page tables of a process - this
+ * is needed by execve(), so that old pages aren't in the way. Note that
+ * unlike 'free_page_tables()', this function still leaves a valid
+ * page-table-tree in memory: it just removes the user pages. The two
+ * functions are similar, but there is a fundamental difference.
   */
-int free_page_tables(unsigned long from,unsigned long size)
+void clear_page_tables(struct task_struct * tsk)
  {
-       unsigned long page;
-       unsigned long page_dir;
-       unsigned long *pg_table;
-       unsigned long * dir, nr;
+       int i;
+       unsigned long * page_dir;
  
-       if (from & 0x3fffff)
-               panic("free_page_tables called with wrong alignment");
-       if (!from)
+       if (!tsk)
+               return;
+       if (tsk == task[0])
+               panic("task[0] (swapper) doesn't support exec() yet\n");
+       page_dir = (unsigned long *) tsk->tss.cr3;
+       if (!page_dir) {
+               printk("Trying to clear kernel page-directory: not good\n");
+               return;
+       }
+       for (i = 0 ; i < 768 ; i++,page_dir++)
+               free_one_table(page_dir);
+       invalidate();
+       return;
+}
+
+/*
+ * This function frees up all page tables of a process when it exits.
+ */
+void free_page_tables(struct task_struct * tsk)
+{
+       int i;
+       unsigned long pg_dir;
+       unsigned long * page_dir;
+
+       if (!tsk)
+               return;
+       if (tsk == task[0]) {
+               printk("task[0] (swapper) killed: unable to recover\n");
                 panic("Trying to free up swapper memory space");
-       size = (size + 0x3fffff) >> 22;
-       dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */
-       for ( ; size-->0 ; dir++) {
-               if (!(page_dir = *dir))
-                       continue;
-               *dir = 0;
-               if (!(page_dir & 1)) {
-                       printk("free_page_tables: bad page directory.");
-                       continue;
-               }
-               pg_table = (unsigned long *) (0xfffff000 & page_dir);
-               for (nr=0 ; nr<1024 ; nr++,pg_table++) {
-                       if (!(page = *pg_table))
-                               continue;
-                       *pg_table = 0;
-                       if (1 & page)
-                               free_page(0xfffff000 & page);
-                       else
-                               swap_free(page >> 1);
-               }
-               free_page(0xfffff000 & page_dir);
         }
+       pg_dir = tsk->tss.cr3;
+       if (!pg_dir) {
+               printk("Trying to free kernel page-directory: not good\n");
+               return;
+       }
+       tsk->tss.cr3 = (unsigned long) swapper_pg_dir;
+       if (tsk == current)
+               __asm__ __volatile__("movl %0,%%cr3"::"a" (tsk->tss.cr3));
+       page_dir = (unsigned long *) pg_dir;
+       for (i = 0 ; i < 1024 ; i++,page_dir++)
+               free_one_table(page_dir);
+       free_page(pg_dir);
         invalidate();
-       return 0;
  }
  
  /*
@@ -143,66 +188,80 @@ int free_page_tables(unsigned long from,unsigned long size)
   * 1 Mb-range, so the pages can be shared with the kernel. Thus the
   * special case for nr=xxxx.
   */
-int copy_page_tables(unsigned long from,unsigned long to,long size)
+int copy_page_tables(struct task_struct * tsk)
  {
-       unsigned long * from_page_table;
-       unsigned long * to_page_table;
-       unsigned long this_page;
-       unsigned long * from_dir, * to_dir;
-       unsigned long new_page;
-       unsigned long nr;
-
-       if ((from&0x3fffff) || (to&0x3fffff))
-               panic("copy_page_tables called with wrong alignment");
-       from_dir = (unsigned long *) ((from>>20) & 0xffc); /* _pg_dir = 0 */
-       to_dir = (unsigned long *) ((to>>20) & 0xffc);
-       size = ((unsigned) (size+0x3fffff)) >> 22;
-       for( ; size-->0 ; from_dir++,to_dir++) {
-               if (*to_dir)
-                       printk("copy_page_tables: already exist, "
-                               "probable memory corruption\n");
-               if (!*from_dir)
+       int i;
+       unsigned long temp_page = 0;
+       unsigned long old_pg_dir, *old_page_dir;
+       unsigned long new_pg_dir, *new_page_dir;
+
+       old_pg_dir = current->tss.cr3;
+       new_pg_dir = get_free_page(GFP_KERNEL);
+       if (!new_pg_dir)
+               return -ENOMEM;
+       tsk->tss.cr3 = new_pg_dir;
+       old_page_dir = (unsigned long *) old_pg_dir;
+       new_page_dir = (unsigned long *) new_pg_dir;
+       for (i = 0 ; i < 1024 ; i++,old_page_dir++,new_page_dir++) {
+               int j;
+               unsigned long old_pg_table, *old_page_table;
+               unsigned long new_pg_table, *new_page_table;
+
+               old_pg_table = *old_page_dir;
+               if (!old_pg_table)
                         continue;
-               if (!(1 & *from_dir)) {
+               if (!(1 & old_pg_table)) {
                         printk("copy_page_tables: page table swapped out, "
                                 "probable memory corruption");
-                       *from_dir = 0;
+                       *old_page_dir = 0;
+                       continue;
+               }
+               if (old_pg_table < low_memory) {
+                       *new_page_dir = old_pg_table;
                         continue;
                 }
-               from_page_table = (unsigned long *) (0xfffff000 & *from_dir);
-               if (!(to_page_table = (unsigned long *) get_free_page(GFP_KERNEL)))
-                       return -1;      /* Out of memory, see freeing */
-               *to_dir = ((unsigned long) to_page_table) | PAGE_ACCESSED | 7;
-               nr = (from==0)?0xA0:1024;
-               for ( ; nr-- > 0 ; from_page_table++,to_page_table++) {
+               new_pg_table = get_free_page(GFP_KERNEL);
+               if (!new_pg_table) {
+                       free_page_tables(tsk);
+                       free_page(temp_page);
+                       return -ENOMEM;
+               }
+               *new_page_dir = new_pg_table | PAGE_ACCESSED | 7;
+               old_page_table = (unsigned long *) (0xfffff000 & old_pg_table);
+               new_page_table = (unsigned long *) (0xfffff000 & new_pg_table);
+               for (j = 0 ; j < 1024 ; j++,old_page_table++,new_page_table++) {
+                       unsigned long pg;
  repeat:
-                       this_page = *from_page_table;
-                       if (!this_page)
+                       pg = *old_page_table;
+                       if (!pg)
                                 continue;
-                       if (!(1 & this_page)) {
-                               if (!(new_page = get_free_page(GFP_KERNEL)))
-                                       return -1;
-                               ++current->rss;
-                               read_swap_page(this_page>>1, (char *) new_page);
-                               if (*from_page_table != this_page) {
-                                       free_page(new_page);
-                                       goto repeat;
-                               }
-                               *to_page_table = this_page;
-                               *from_page_table = new_page | (PAGE_DIRTY | PAGE_ACCESSED | 7);
+                       if (pg & 1) {
+                               pg &= ~2;
+                               *new_page_table = pg;
+                               if (pg < low_memory)
+                                       continue;
+                               *old_page_table = pg;
+                               mem_map[(pg-low_memory)>>12]++;
                                 continue;
                         }
-                       this_page &= ~2;
-                       *to_page_table = this_page;
-                       if (this_page > low_memory) {
-                               *from_page_table = this_page;
-                               this_page -= low_memory;
-                               this_page >>= 12;
-                               if (!mem_map[this_page]++)
-                                       --nr_free_pages;
+                       if (!temp_page) {
+                               temp_page = get_free_page(GFP_KERNEL);
+                               if (!temp_page) {
+                                       free_page_tables(tsk);
+                                       return -ENOMEM;
+                               }
+                               goto repeat;
                         }
+                       ++current->rss;
+                       read_swap_page(pg>>1, (char *) temp_page);
+                       if (*old_page_table != pg)
+                               goto repeat;
+                       *new_page_table = pg;
+                       *old_page_table = temp_page | (PAGE_DIRTY | PAGE_ACCESSED | 7);
+                       temp_page = 0;
                 }
         }
+       free_page(temp_page);
         invalidate();
         return 0;
  }
@@ -222,7 +281,7 @@ int unmap_page_range(unsigned long from, unsigned long size)
         if (!from)
                 panic("unmap_page_range trying to free swapper memory space");
         size = (size + 0xfff) >> 12;
-       dir = (unsigned long *) ((from >> 20) & 0xffc); /* _pg_dir = 0 */
+       dir = (unsigned long *) (current->tss.cr3 + ((from >> 20) & 0xffc));
         poff = (from >> 12) & 0x3ff;
         if ((pcnt = 1024 - poff) > size)
                 pcnt = size;
@@ -284,7 +343,7 @@ int remap_page_range(unsigned long from, unsigned long to, unsigned long size,
  
         if ((from & 0xfff) || (to & 0xfff))
                 panic("remap_page_range called with wrong alignment");
-       dir = (unsigned long *) ((from >> 20) & 0xffc); /* _pg_dir = 0 */
+       dir = (unsigned long *) (current->tss.cr3 + ((from >> 20) & 0xffc));
         size = (size + 0xfff) >> 12;
         poff = (from >> 12) & 0x3ff;
         if ((pcnt = 1024 - poff) > size)
@@ -363,7 +422,7 @@ int remap_page_range(unsigned long from, unsigned long to, unsigned long size,
   * out of memory (either when trying to access page-table or
   * page.)
   */
-static unsigned long put_page(unsigned long page,unsigned long address)
+static unsigned long put_page(struct task_struct * tsk,unsigned long page,unsigned long address)
  {
         unsigned long tmp, *page_table;
  
@@ -377,13 +436,13 @@ static unsigned long put_page(unsigned long page,unsigned long address)
                 printk("put_page: mem_map disagrees with %p at %p\n",page,address);
                 return 0;
         }
-       page_table = (unsigned long *) ((address>>20) & 0xffc);
+       page_table = (unsigned long *) (tsk->tss.cr3 + ((address>>20) & 0xffc));
         if ((*page_table)&1)
                 page_table = (unsigned long *) (0xfffff000 & *page_table);
         else {
                 tmp = get_free_page(GFP_KERNEL);
                 if (!tmp) {
-                       oom(current);
+                       oom(tsk);
                         tmp = BAD_PAGETABLE;
                 }
                 *page_table = tmp | PAGE_ACCESSED | 7;
@@ -406,7 +465,7 @@ static unsigned long put_page(unsigned long page,unsigned long address)
   * and we want the dirty-status to be correct (for VM). Thus the same
   * routine, but this time we mark it dirty too.
   */
-unsigned long put_dirty_page(unsigned long page, unsigned long address)
+unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
  {
         unsigned long tmp, *page_table;
  
@@ -416,7 +475,7 @@ unsigned long put_dirty_page(unsigned long page, unsigned long address)
                 printk("put_dirty_page: trying to put page %p at %p\n",page,address);
         if (mem_map[(page-low_memory)>>12] != 1)
                 printk("mem_map disagrees with %p at %p\n",page,address);
-       page_table = (unsigned long *) ((address>>20) & 0xffc);
+       page_table = (unsigned long *) (tsk->tss.cr3 + ((address>>20) & 0xffc));
         if ((*page_table)&1)
                 page_table = (unsigned long *) (0xfffff000 & *page_table);
         else {
@@ -491,7 +550,7 @@ void do_wp_page(unsigned long error_code, unsigned long address,
  {
         unsigned long pde, pte, page;
  
-       pde = (address>>20) & 0xffc;
+       pde = tsk->tss.cr3 + ((address>>20) & 0xffc);
         pte = *(unsigned long *) pde;
         if ((pte & 3) != 3) {
                 printk("do_wp_page: bogus page-table at address %08x (%08x)\n",address,pte);
@@ -499,12 +558,6 @@ void do_wp_page(unsigned long error_code, unsigned long address,
                 send_sig(SIGSEGV, tsk, 1);
                 return;
         }
-       if (address < TASK_SIZE) {
-               printk("do_wp_page: kernel WP error at address %08x (%08x)\n",address,pte);
-               *(unsigned long *) pde = BAD_PAGETABLE | 7;
-               send_sig(SIGSEGV, tsk, 1);
-               return;
-       }
         pte &= 0xfffff000;
         pte += (address>>10) & 0xffc;
         page = *(unsigned long *) pte;
@@ -514,7 +567,7 @@ void do_wp_page(unsigned long error_code, unsigned long address,
                 send_sig(SIGSEGV, tsk, 1);
                 return;
         }
-       ++current->min_flt;
+       tsk->min_flt++;
         un_wp_page((unsigned long *) pte, tsk);
  }
  
@@ -522,7 +575,7 @@ void write_verify(unsigned long address)
  {
         unsigned long page;
  
-       page = *(unsigned long *) ((address>>20) & 0xffc);
+       page = *(unsigned long *) (current->tss.cr3 + ((address>>20) & 0xffc));
         if (!(page & PAGE_PRESENT))
                 return;
         page &= 0xfffff000;
@@ -532,16 +585,16 @@ void write_verify(unsigned long address)
         return;
  }
  
-static void get_empty_page(unsigned long address)
+static void get_empty_page(struct task_struct * tsk, unsigned long address)
  {
         unsigned long tmp;
  
         tmp = get_free_page(GFP_KERNEL);
         if (!tmp) {
-               oom(current);
+               oom(tsk);
                 tmp = BAD_PAGE;
         }
-       if (!put_page(tmp,address))
+       if (!put_page(tsk,tmp,address))
                 free_page(tmp);
  }
  
@@ -553,7 +606,8 @@ static void get_empty_page(unsigned long address)
   * NOTE! This assumes we have checked that p != current, and that they
   * share the same executable or library.
   */
-static int try_to_share(unsigned long address, struct task_struct * p)
+static int try_to_share(unsigned long address, struct task_struct * tsk,
+       struct task_struct * p)
  {
         unsigned long from;
         unsigned long to;
@@ -561,9 +615,8 @@ static int try_to_share(unsigned long address, struct task_struct * p)
         unsigned long to_page;
         unsigned long phys_addr;
  
-       from_page = to_page = ((address>>20) & 0xffc);
-       from_page += ((p->start_code>>20) & 0xffc);
-       to_page += ((current->start_code>>20) & 0xffc);
+       from_page = p->tss.cr3 + ((address>>20) & 0xffc);
+       to_page = tsk->tss.cr3 + ((address>>20) & 0xffc);
  /* is there a page-directory at from? */
         from = *(unsigned long *) from_page;
         if (!(from & 1))
@@ -607,7 +660,7 @@ static int try_to_share(unsigned long address, struct task_struct * p)
   * We first check if it is at all feasible by checking executable->i_count.
   * It should be >1 if there are other tasks sharing this inode.
   */
-static int share_page(struct inode * inode, unsigned long address)
+static int share_page(struct task_struct * tsk, struct inode * inode, unsigned long address)
  {
         struct task_struct ** p;
         int i;
@@ -617,19 +670,16 @@ static int share_page(struct inode * inode, unsigned long address)
         for (p = &LAST_TASK ; p > &FIRST_TASK ; --p) {
                 if (!*p)
                         continue;
-               if (current == *p)
+               if (tsk == *p)
                         continue;
-               if (address < LIBRARY_OFFSET) {
-                       if (inode != (*p)->executable)
-                               continue;
-               } else {
+               if (inode != (*p)->executable) {
                         for (i=0; i < (*p)->numlibraries; i++)
                                 if (inode == (*p)->libraries[i].library)
                                         break;
                         if (i >= (*p)->numlibraries)
                                 continue;
                 }
-               if (try_to_share(address,*p))
+               if (try_to_share(address,tsk,*p))
                         return 1;
         }
         return 0;
@@ -671,15 +721,7 @@ void do_no_page(unsigned long error_code, unsigned long address,
         unsigned int block,i;
         struct inode * inode;
  
-       if (address < TASK_SIZE) {
-               printk("\n\rBAD!! KERNEL PAGE MISSING\n\r");
-               do_exit(SIGSEGV);
-       }
-       if (address - tsk->start_code >= TASK_SIZE) {
-               printk("Bad things happen: nonexistent page error in do_no_page\n\r");
-               do_exit(SIGSEGV);
-       }
-       page = get_empty_pgtable((unsigned long *) ((address >> 20) & 0xffc));
+       page = get_empty_pgtable((unsigned long *) (tsk->tss.cr3 + ((address >> 20) & 0xffc)));
         if (!page)
                 return;
         page &= 0xfffff000;
@@ -696,53 +738,56 @@ void do_no_page(unsigned long error_code, unsigned long address,
                 return;
         }
         address &= 0xfffff000;
-       tmp = address - tsk->start_code;
         inode = NULL;
         block = 0;
-       if (tmp < tsk->end_data) {
+       if (address < tsk->end_data) {
                 inode = tsk->executable;
-               block = 1 + tmp / BLOCK_SIZE;
+               block = 1 + address / BLOCK_SIZE;
         } else {
                 i = tsk->numlibraries;
                 while (i-- > 0) {
-                       if (tmp < tsk->libraries[i].start)
+                       if (address < tsk->libraries[i].start)
                                 continue;
-                       block = tmp - tsk->libraries[i].start;
-                       if (block >= tsk->libraries[i].length)
+                       block = address - tsk->libraries[i].start;
+                       if (block >= tsk->libraries[i].length + tsk->libraries[i].bss)
                                 continue;
                         inode = tsk->libraries[i].library;
-                       block = 1 + block / BLOCK_SIZE;
+                       if (block < tsk->libraries[i].length)
+                               block = 1 + block / BLOCK_SIZE;
+                       else
+                               block = 0;
                         break;
                 }
         }
         if (!inode) {
                 ++tsk->min_flt;
-               get_empty_page(address);
+               get_empty_page(tsk,address);
                 if (tsk != current)
                         return;
-               if (tmp >= LIBRARY_OFFSET || tmp < tsk->brk)
+               if (address < tsk->brk)
                         return;
-               if (tmp+8192 >= (user_esp & 0xfffff000))
+               if (address+8192 >= (user_esp & 0xfffff000))
                         return;
                 send_sig(SIGSEGV,tsk,1);
                 return;
         }
-       if (tsk == current)
-               if (share_page(inode,tmp)) {
-                       ++tsk->min_flt;
-                       return;
-               }
+       if (share_page(tsk,inode,address)) {
+               ++tsk->min_flt;
+               return;
+       }
         ++tsk->maj_flt;
         page = get_free_page(GFP_KERNEL);
         if (!page) {
                 oom(current);
-               put_page(BAD_PAGE,address);
+               put_page(tsk,BAD_PAGE,address);
                 return;
         }
-       for (i=0 ; i<4 ; block++,i++)
-               nr[i] = bmap(inode,block);
-       bread_page(page,inode->i_dev,nr);
-       i = tmp + 4096 - tsk->end_data;
+       if (block) {
+               for (i=0 ; i<4 ; block++,i++)
+                       nr[i] = bmap(inode,block);
+               bread_page(page,inode->i_dev,nr);
+       }
+       i = address + 4096 - tsk->end_data;
         if (i>4095)
                 i = 0;
         tmp = page + 4096;
@@ -750,7 +795,7 @@ void do_no_page(unsigned long error_code, unsigned long address,
                 tmp--;
                 *(char *)tmp = 0;
         }
-       if (put_page(page,address))
+       if (put_page(tsk,page,address))
                 return;
         free_page(page);
         oom(current);
@@ -758,9 +803,8 @@ void do_no_page(unsigned long error_code, unsigned long address,
  
  void show_mem(void)
  {
-       int i,j,k,free=0,total=0;
+       int i,free=0,total=0;
         int shared = 0;
-       unsigned long * pg_tbl;
  
         printk("Mem-info:\n\r");
         printk("Free pages:    %6d\n",nr_free_pages);
@@ -776,41 +820,14 @@ void show_mem(void)
         }
         printk("%d free pages of %d\n\r",free,total);
         printk("%d pages shared\n\r",shared);
-       printk("%d free pages via nr_free_pages\n\r", nr_free_pages);
-       k = 0;
-       for(i=4 ; i<1024 ;) {
-               if (1&pg_dir[i]) {
-                       if (pg_dir[i]>high_memory) {
-                               printk("page directory[%d]: %08X\n\r",
-                                       i,pg_dir[i]);
-                               i++;
-                               continue;
-                       }
-                       if (pg_dir[i]>low_memory)
-                               free++,k++;
-                       pg_tbl=(unsigned long *) (0xfffff000 & pg_dir[i]);
-                       for(j=0 ; j<1024 ; j++)
-                               if ((pg_tbl[j]&1) && pg_tbl[j]>low_memory)
-                                       if (pg_tbl[j]>high_memory)
-                                               printk("page_dir[%d][%d]: %08X\n\r",
-                                                       i,j, pg_tbl[j]);
-                                       else
-                                               k++,free++;
-               }
-               i++;
-               if (!(i&15) && k) {
-                       k++,free++;     /* one page/process for task_struct */
-                       printk("Process %d: %d pages\n\r",(i>>4)-1,k);
-                       k = 0;
-               }
-       }
-       printk("Memory found: %d (%d)\n\r",free-shared,total);
  }
  
  
-/* This routine handles page faults.  It determines the address,
-   and the problem then passes it off to one of the appropriate
-   routines. */
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to one of the appropriate
+ * routines.
+ */
  void do_page_fault(unsigned long *esp, unsigned long error_code)
  {
         unsigned long address;
@@ -822,13 +839,10 @@ void do_page_fault(unsigned long *esp, unsigned long error_code)
                 user_esp = 0;
         /* get the address */
         __asm__("movl %%cr2,%0":"=r" (address));
-       if (!(error_code & 1)) {
+       if (!(error_code & 1))
                 do_no_page(error_code, address, current, user_esp);
-               return;
-       } else {
+       else
                 do_wp_page(error_code, address, current, user_esp);
-               return;
-       }
  }
  
  unsigned long mem_init(unsigned long start_mem, unsigned long end_mem)
diff --git a/mm/swap.c b/mm/swap.c

index cc95a72125fdb9c2a5539236476ed09a1b4f43c2..ce3d7982eba000161a571ef9b17d466ee5a36e57 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -179,16 +179,9 @@ int try_to_swap_out(unsigned long * table_ptr)
         return 1;
  }
  
-/*
- * We never page the pages in task[0] - kernel memory.
- * We page all other pages.
- */
-#define FIRST_VM_PAGE (TASK_SIZE>>12)
-#define LAST_VM_PAGE (1024*1024)
-#define VM_PAGES (LAST_VM_PAGE - FIRST_VM_PAGE)
-
-static unsigned int dir_entry = 1024;
-static unsigned int page_entry = 0;
+static int swap_task = 1;
+static int swap_table = 0;
+static int swap_page = 0;
  
  /*
   * sys_idle() does nothing much: it just searches for likely candidates for
@@ -201,23 +194,32 @@ int sys_idle(void)
         unsigned long page;
  
         need_resched = 1;
-       if (dir_entry >= 1024)
-               dir_entry = FIRST_VM_PAGE>>10;
-       p = task[dir_entry >> 4];
-       page = pg_dir[dir_entry];
-       if (!(page & 1) || !p || !p->swappable) {
-               dir_entry++;
+       if (swap_task >= NR_TASKS)
+               swap_task = 1;
+       p = task[swap_task];
+       if (!p || !p->swappable) {
+               swap_task++;
+               return 0;
+       }
+       if (swap_table >= 1024) {
+               swap_task++;
+               swap_table = 0;
+               return 0;
+       }
+       page = ((unsigned long *) p->tss.cr3)[swap_table];
+       if (!(page & 1) || (page < low_memory)) {
+               swap_table++;
                 return 0;
         }
         page &= 0xfffff000;
-       if (page_entry >= 1024) {
-               page_entry = 0;
-               dir_entry++;
+       if (swap_page >= 1024) {
+               swap_page = 0;
+               swap_table++;
                 return 0;
         }
-       page = *(page_entry + (unsigned long *) page);
+       page = *(swap_page + (unsigned long *) page);
         if ((page < low_memory) || !(page & PAGE_PRESENT) || (page & PAGE_ACCESSED))
-               page_entry++;
+               swap_page++;
         return 0;
  }
  
@@ -231,48 +233,54 @@ int sys_idle(void)
   */
  int swap_out(unsigned int priority)
  {
-       int counter = VM_PAGES / 2;
+       int counter = NR_TASKS;
         int pg_table;
         struct task_struct * p;
  
+       counter <<= priority;
+check_task:
+       if (counter-- < 0)
+               return 0;
+       if (swap_task >= NR_TASKS) {
+               swap_task = 1;
+               goto check_task;
+       }
+       p = task[swap_task];
+       if (!p || !p->swappable) {
+               swap_task++;
+               goto check_task;
+       }
  check_dir:
-       if (counter < 0)
-               goto no_swap;
-       if (dir_entry >= 1024)
-               dir_entry = FIRST_VM_PAGE>>10;
-       if (!(p = task[dir_entry >> 4]) || !p->swappable) {
-               counter -= 1024;
-               dir_entry++;
+       if (swap_table >= 1024) {
+               swap_table = 0;
+               swap_task++;
+               goto check_task;
+       }
+       pg_table = ((unsigned long *) p->tss.cr3)[swap_table];
+       if (pg_table < low_memory) {
+               swap_table++;
                 goto check_dir;
         }
-       if (!(1 & (pg_table = pg_dir[dir_entry]))) {
-               if (pg_table) {
-                       printk("bad page-table at pg_dir[%d]: %08x\n\r",
-                               dir_entry,pg_table);
-                       pg_dir[dir_entry] = 0;
-               }
-               counter -= 1024;
-               dir_entry++;
+       if (!(1 & pg_table)) {
+               printk("bad page-table at pg_dir[%d]: %08x\n\r",
+                       swap_table,pg_table);
+               ((unsigned long *) p->tss.cr3)[swap_table] = 0;
+               swap_table++;
                 goto check_dir;
         }
         pg_table &= 0xfffff000;
  check_table:
-       if (counter < 0)
-               goto no_swap;
-       if (page_entry >= 1024) {
-               page_entry = 0;
-               dir_entry++;
+       if (swap_page >= 1024) {
+               swap_page = 0;
+               swap_table++;
                 goto check_dir;
         }
-       if (try_to_swap_out(page_entry + (unsigned long *) pg_table)) {
+       if (try_to_swap_out(swap_page + (unsigned long *) pg_table)) {
                 p->rss--;
                 return 1;
         }
-       page_entry++;
-       counter--;
+       swap_page++;
         goto check_table;
-no_swap:
-       return 0;
  }
  
  static int try_to_free_page(void)
@@ -335,10 +343,8 @@ repeat:
         }
         if (priority <= GFP_BUFFER)
                 return 0;
-       if (try_to_free_page()) {
-               schedule();
+       if (try_to_free_page())
                 goto repeat;
-       }
         return 0;
  }
  
@@ -355,8 +361,9 @@ int sys_swapon(const char * specialfile)
  
         if (!suser())
                 return -EPERM;
-       if (!(swap_inode  = namei(specialfile)))
-               return -ENOENT;
+       i = namei(specialfile,&swap_inode);
+       if (i)
+               return i;
         if (swap_file || swap_device || swap_bitmap || swap_lockmap) {
                 iput(swap_inode);
                 return -EBUSY;
diff --git a/net/Makefile b/net/Makefile

index 72a28e1a3dca7685e105f55f671cc4ee45233ce1..b61a843e83bd7b2179d63a50caefa307a164588d 100644 (file)
--- a/net/Makefile
+++ b/net/Makefile
@@ -27,7 +27,7 @@ net.o: $(OBJS) subdirs
  
  
  subdirs: dummy
-       for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE)) || exit; done
+       for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE)) || exit; done
  
  clean:
         rm -f core *.o *.a tmp_make
@@ -37,7 +37,7 @@ dep:
         sed '/\#\#\# Dependencies/q' < Makefile > tmp_make
         for i in *.c;do $(CPP) -M $$i;done >> tmp_make
         cp tmp_make Makefile
-       @for i in $(SUBDIRS); do (cd $$i; echo $$i; $(MAKE) dep || exit; done
+       @for i in $(SUBDIRS); do (cd $$i && echo $$i && $(MAKE) dep) || exit; done
  
  dummy:
  
diff --git a/net/unix.c b/net/unix.c

index b0a2f10a7eea8d44c4e9d4b650296111df60a164..26bc918d2bd3ef124c63b0a7cc5be95c85ebf8a4 100644 (file)
--- a/net/unix.c
+++ b/net/unix.c
@@ -351,7 +351,7 @@ unix_proto_bind(struct socket *sock, struct sockaddr *umyaddr,
         set_fs(get_ds());
         i = do_mknod(fname, S_IFSOCK | 0777, 0);
         if (i == 0)
-               i = open_namei(fname, 0, S_IFSOCK, &upd->inode);
+               i = open_namei(fname, 0, S_IFSOCK, &upd->inode, NULL);
         set_fs(old_fs);
         if (i < 0) {
                 printk("unix_proto_bind: can't open socket %s\n", fname);
author	Linus Torvalds <torvalds@linuxfoundation.org>
	Fri, 23 Nov 2007 20:09:04 +0000 (15:09 -0500)
committer	Linus Torvalds <torvalds@linuxfoundation.org>
	Fri, 23 Nov 2007 20:09:04 +0000 (15:09 -0500)
Makefile		patch \| blob \| history
boot/head.s		patch \| blob \| history
fs/Makefile		patch \| blob \| history
fs/buffer.c		patch \| blob \| history
fs/exec.c		patch \| blob \| history
fs/ext/freelists.c		patch \| blob \| history
fs/ext/inode.c		patch \| blob \| history
fs/ext/symlink.c		patch \| blob \| history
fs/fcntl.c		patch \| blob \| history
fs/locks.c	[new file with mode: 0644]	patch \| blob
fs/minix/bitmap.c		patch \| blob \| history
fs/minix/inode.c		patch \| blob \| history
fs/minix/symlink.c		patch \| blob \| history
fs/msdos/Makefile		patch \| blob \| history
fs/namei.c		patch \| blob \| history
fs/open.c		patch \| blob \| history
fs/select.c		patch \| blob \| history
fs/stat.c		patch \| blob \| history
fs/super.c		patch \| blob \| history
include/asm/system.h		patch \| blob \| history
include/linux/ext_fs_i.h	[new file with mode: 0644]	patch \| blob
include/linux/fs.h		patch \| blob \| history
include/linux/head.h		patch \| blob \| history
include/linux/limits.h		patch \| blob \| history
include/linux/minix_fs_i.h	[new file with mode: 0644]	patch \| blob
include/linux/mm.h		patch \| blob \| history
include/linux/msdos_fs_i.h	[new file with mode: 0644]	patch \| blob
include/linux/sched.h		patch \| blob \| history
include/linux/sys.h		patch \| blob \| history
include/linux/unistd.h		patch \| blob \| history
include/linux/vm86.h	[new file with mode: 0644]	patch \| blob
kernel/Makefile		patch \| blob \| history
kernel/blk_drv/Makefile		patch \| blob \| history
kernel/blk_drv/blk.h		patch \| blob \| history
kernel/blk_drv/hd.c		patch \| blob \| history
kernel/blk_drv/ll_rw_blk.c		patch \| blob \| history
kernel/chr_drv/mem.c		patch \| blob \| history
kernel/exit.c		patch \| blob \| history
kernel/fork.c		patch \| blob \| history
kernel/math/emulate.c		patch \| blob \| history
kernel/ptrace.c		patch \| blob \| history
kernel/sched.c		patch \| blob \| history
kernel/signal.c		patch \| blob \| history
kernel/sys.c		patch \| blob \| history
kernel/sys_call.S		patch \| blob \| history
kernel/traps.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/swap.c		patch \| blob \| history
net/Makefile		patch \| blob \| history
net/unix.c		patch \| blob \| history